library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(colorspace) ; library(corrplot)
library(GGally) ; library(ggpubr) ; library(ggExtra)
library(WGCNA)
library(expss)
library(polycor)
library(biomaRt)
library(clusterProfiler) ; library(ReactomePA) ; library(DOSE) ; library(org.Hs.eg.db)
library(foreach) ; library(doParallel)
library(knitr) ; library(kableExtra) ; library(xtable)
SFARI_colour_hue = function(r) {
pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}
# Get colors from the ggplot palette
gg_colour_hue = function(n) {
hues = seq(15, 375, length = n+1)
pal = hcl(h = hues, l = 65, c = 100)[1:n]
}
# Assign an HCL rainbow colour to each module
get_mod_colours = function(mods){
n = length(unique(mods))-1
set.seed(123) ; rand_order = sample(1:n)
mod_colors = c('white', gg_colour_hue(n)[rand_order])
names(mod_colors) = mods %>% table %>% names
return(mod_colors)
}
# Compare results from GSEA and ORA
compare_methods = function(GSEA_list, ORA_list, top_modules_enrichment, top_modules, database){
for(module in top_modules){
cat(paste0(' \n \n Enrichment results for cluster ',
genes_info$module_number[genes_info$Module==module][1], ': \n'))
cat(paste0('- GSEA has ', nrow(GSEA_list[[module]][[database]]@result), ' enriched term(s) \n'))
cat(paste0('- ORA has ', nrow(ORA_list[[module]][[database]]@result), ' enriched term(s) \n'))
cat(paste0('- ', nrow(top_modules_enrichment[[module]][[database]]),
' terms are enriched in both methods \n \n'))
enriched_terms = top_modules_enrichment[[module]][[database]] %>%
dplyr::select(ID, Description.x, p.adjust_ORA, p.adjust_GSEA, qvalue_ORA, GeneRatio) %>%
dplyr::rename('Description' = Description.x)
if(nrow(enriched_terms)>0){
print(enriched_terms %>% mutate(pval_mean = p.adjust_ORA + p.adjust_GSEA) %>%
arrange(pval_mean) %>% dplyr::select(-pval_mean) %>%
kable %>% kable_styling(full_width = F))
##########################################################################################################
# Get genes involved
genes = c()
i=1
for(row_genes in top_modules_enrichment[[module]][[database]] %>% pull(geneID)){
genes = c(genes, strsplit(row_genes,'/') %>% unlist) %>% unique
if(i==5){
cat(paste0('Genes involved in top 5 enriched terms: ',
paste(gene_names %>% filter(entrezgene %in% genes) %>% pull(hgnc_symbol) %>% unique %>%
sort, collapse = ', '),'\n'))
}
i = i+1
}
if(i != 5){
genes = gene_names %>% filter(entrezgene %in% genes) %>% pull(hgnc_symbol) %>% unique %>% sort
cat(paste0('\nGenes involved in all enriched terms: ', paste(genes, collapse = ', ')))
}
##########################################################################################################
}
}
}
plot_results = function(top_modules_enrichment, top_modules, database){
l = htmltools::tagList()
for(i in 1:length(top_modules)){
plot_data = top_modules_enrichment[[top_modules[i]]][[database]] %>%
dplyr::rename('Description' = Description.x)
if(nrow(plot_data)>5){
min_val = min(min(plot_data$p.adjust_GSEA), min(plot_data$p.adjust_ORA))
max_val = max(max(max(plot_data$p.adjust_GSEA), max(plot_data$p.adjust_ORA)), 0.05)
ggp = ggplotly(plot_data %>% ggplot(aes(p.adjust_GSEA, p.adjust_ORA, color = NES)) +
geom_point(aes(id = Description)) +
geom_vline(xintercept = 0.05, color = 'gray', linetype = 'dotted') +
geom_hline(yintercept = 0.05, color = 'gray', linetype = 'dotted') +
ggtitle(paste0('Enriched terms in common for cluster ',
genes_info$module_number[genes_info$Module==top_modules[i]][1])) +
scale_x_continuous(limits = c(min_val, max_val)) +
scale_y_continuous(limits = c(min_val, max_val)) +
xlab('Corrected p-value for GSEA') + ylab('Corrected p-value for ORA') +
scale_colour_viridis(direction = -1) + theme_minimal() + coord_fixed())
l[[i]] = ggp
}
}
return(l)
}
# plot_shared_genes(top_modules_enrichment, top_modules, 'GO')
plot_shared_genes = function(top_modules_enrichment, top_modules, database){
for(tm in 1:length(top_modules)){
plot_data = top_modules_enrichment[[top_modules[tm]]][[database]] %>%
mutate(pval_mean = p.adjust_ORA + p.adjust_GSEA) %>% arrange(pval_mean) %>%
dplyr::select(ID, geneID)
if(nrow(plot_data)>=2){
plot_data = plot_data %>% slice_head(n=5)
shared_genes = matrix(0, nrow(plot_data), nrow(plot_data))
for(i in 1:(nrow(plot_data)-1)){
for(j in (i+1):nrow(plot_data)){
gene_set_1 = strsplit(plot_data$geneID[i], '/') %>% unlist
gene_set_2 = strsplit(plot_data$geneID[j], '/') %>% unlist
shared_genes[i,j] = sum(gene_set_1 %in% gene_set_2)/length(unique(c(gene_set_1, gene_set_2)))
shared_genes[j,i] = shared_genes[i,j]
}
}
rownames(shared_genes) = plot_data$ID
colnames(shared_genes) = plot_data$ID
corrplot(shared_genes, type = 'lower', method = 'square', diag = FALSE, number.digits = 2, cl.pos = 'n',
tl.pos = 'ld', tl.col = '#666666', order = 'hclust', col.lim = c(0,1), addCoef.col = 'black',
mar = c(0,0,2,0), tl.cex = 0.8, number.cex= 0.8,
title = paste0('Genes in common for top terms in cluster ',
genes_info$module_number[genes_info$Module==top_modules[tm]][1]))
}
}
}
# Print table with top results (for annex in thesis)
print_table_w_top_results = function(top_modules_enrichment, module, database, n){
enriched_terms = top_modules_enrichment[[module]][[database]] %>%
mutate(pval_mean = p.adjust_ORA + p.adjust_GSEA) %>% arrange(pval_mean) %>%
top_n(-n, wt=pval_mean) %>% dplyr::rename('Description' = Description.x) %>%
dplyr::select(ID, Description, p.adjust_GSEA, p.adjust_ORA, NES, GeneRatio) %>%
xtable(display =c('s','s','s','e','e','f','s'))
return(print(enriched_terms, include.rownames=FALSE))
}
#print_table_w_top_results(selected_modules_enrichment, names(selected_modules_enrichment)[2], 'DN', 5)
# SFARI Genes
SFARI_genes = read_csv('./../../SFARI/Data/SFARI_genes_01-03-2020_w_ensembl_IDs.csv')
# Load Gandal dataset
load('./../Data/preprocessedData/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
# WGCNA metrics
WGCNA_metrics = read.csv('./../Data/preprocessedData/WGCNA_metrics.csv')
# Updates genes_info with SFARI information and clusters
genes_info = genes_info %>% left_join(SFARI_genes, by = 'ID') %>%
left_join(datGenes %>% mutate(ID = rownames(.)) %>% dplyr::select(ID, hgnc_symbol), by = 'ID') %>%
dplyr::select(ID, hgnc_symbol, log2FoldChange, shrunken_log2FoldChange, significant, Neuronal) %>%
left_join(WGCNA_metrics, by = 'ID') %>% dplyr::select(-contains('pval'))
################################################################################################################
# Get entrezene ID of genes
gene_names = genes_info %>% dplyr::rename('ensembl_gene_id' = ID) %>% filter(Module!='gray')
# ClusterProfile works with Entrez Gene Ids, o we have to assign one to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart=useMart(biomart='ENSEMBL_MART_ENSEMBL',dataset='hsapiens_gene_ensembl',host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'),
values=gene_names$ensembl_gene_id, mart=mart)
gene_names = biomart_output %>% left_join(gene_names %>% dplyr::select(ensembl_gene_id, hgnc_symbol),
by='ensembl_gene_id') %>% dplyr::rename('ID'=ensembl_gene_id)
rm(getinfo, mart, biomart_output)
rm(dds, WGCNA_metrics)
Both GSEA and ORA are commonly used to study enrichment in sets of genes, but when using them for studying our modules both have shortcomings:
GSEA takes into consideration some ordering of the genes, in this case given by their Module Membership, which is correlated to the membership of genes to the module, but has two problems:
Being a continuous scale, it doesn’t separate by a threshold the genes that are truly in the cluster from the rest
The Module Membership metric is correlated to the real membership of the module, but this correlation is not perfect: a high MM doesn’t always mean the gene belongs to that module, for example, selecting a random module, in the plot below we can see the MM distribution of genes belonging to that module against the rest of the genes belonging to other modules and, although in general, genes belonging to that module have a higher distribution of MM, there is still a big overlap between the two groups, making MM a noisy metric for performing GSEA
module = genes_info %>% filter(abs(MTcor) > 0.9) %>% slice_head(n=1) %>% pull(Module) %>% as.character
plot_data = genes_info %>% dplyr::select(Module, paste0('MM.',gsub('#','',module))) %>%
mutate(in_module = substring(Module,2) == gsub('#','',module),
selected_module = paste('Cluster', genes_info$module_number[genes_info$Module==module][1] %>%
as.character)) %>%
mutate(alpha = ifelse(in_module, 0.8, 0.1))
colnames(plot_data)[2] = 'MM'
p = plot_data %>% ggplot(aes(selected_module, MM, color = in_module)) + geom_jitter(alpha = plot_data$alpha) +
xlab('') + ylab(paste('Cluster membership to cluster',
genes_info$module_number[genes_info$Module==module][1])) + coord_flip() +
theme_minimal() + theme(legend.position = 'bottom', axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
labs(color = paste('Gene belongs to cluster', genes_info$module_number[genes_info$Module==module][1]))
ggExtra::ggMarginal(p, type = 'density', groupColour = TRUE, groupFill = TRUE, margins = 'x', size=1)
rm(modules, module, p, plot_data)
So perhaps it could be useful to use both methods together, since they seem to complement each other’s shortcomings very well, performing the enrichment using both methods and identifying the terms that are found to be enriched by both
Note: Since the enrichment in both methods is quite a stric restriction, we decide to relax the corrected p-value threshold (using Bonferroni correction) to 0.1.
Note: This script may take a bit to run (~30 mins with an 8 core Intel(R) Core(TM) i5-8400H CPU @ 2.50GHz laptop) and sometimes there are problems with the API and it will freeze or kill the process printing ‘error writing to connection’, but this when this has happened, it has been fixed in less than a day (except once that took 4 days…).
top_modules = c('#44A0FF', '#D177FF', '#F47B5B', '#00BADE', '#64B200', '#DD71FA')
if(file.exists('./../Data/preprocessedData/top_modules_enrichment.RData')){
load('./../Data/preprocessedData/top_modules_enrichment.RData')
load('./../Data/preprocessedData/GSEA_results.RData')
load('./../Data/preprocessedData/ORA_results.RData')
} else{
################################################################################################################
# Prepare dataset for Enrichment Analysis
EA_dataset = genes_info %>% dplyr::rename('ensembl_gene_id' = ID) %>% filter(Module!='gray')
# ClusterProfile works with Entrez Gene Ids, o we have to assign one to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart=useMart(biomart='ENSEMBL_MART_ENSEMBL',dataset='hsapiens_gene_ensembl',host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'),
values=EA_dataset$ensembl_gene_id, mart=mart)
EA_dataset = biomart_output %>% left_join(EA_dataset, by='ensembl_gene_id') %>% dplyr::rename('ID'=ensembl_gene_id)
rm(getinfo, mart, biomart_output)
################################################################################################################
# GSEA enrichment
file_name = './../Data/preprocessedData/GSEA_results.RData'
if(file.exists(file_name)){
load(file_name)
} else {
nPerm = 1e5
GSEA_dataset = EA_dataset %>% dplyr::select(ID, entrezgene, contains('MM.'))
GSEA_enrichment = list()
for(module in top_modules){
cat(paste0('\nModule: ', which(top_modules == module), '/', length(top_modules)))
geneList = GSEA_dataset %>% pull(paste0('MM.',substring(module,2)))
names(geneList) = GSEA_dataset %>% pull(entrezgene) %>% as.character
geneList = sort(geneList, decreasing = TRUE)
GSEA_GO = gseGO(geneList, OrgDb = org.Hs.eg.db, pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1,
nPerm = nPerm, verbose = FALSE, seed = TRUE)
GSEA_DO = gseDO(geneList, pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1,
nPerm = nPerm, verbose = FALSE, seed = TRUE)
GSEA_DGN = gseDGN(geneList, pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1,
nPerm = nPerm, verbose = FALSE, seed = TRUE)
GSEA_KEGG = gseKEGG(geneList, organism = 'human', pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1,
nPerm = nPerm, verbose = FALSE, seed = TRUE)
GSEA_Reactome = gsePathway(geneList, organism = 'human', pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1,
nPerm = nPerm, verbose = FALSE, seed = TRUE)
GSEA_enrichment[[module]] = list('GO' = GSEA_GO, 'DO' = GSEA_DO, 'DGN' = GSEA_DGN, 'KEGG' = GSEA_KEGG,
'Reactome' = GSEA_Reactome)
# Save after each iteration (in case it breaks)
save(GSEA_enrichment, file = file_name)
}
rm(GSEA_dataset, nPerm, geneList, GSEA_GO, GSEA_DO, GSEA_DGN, GSEA_KEGG, GSEA_Reactome)
}
################################################################################################################
# ORA enrichment
file_name = './../Data/preprocessedData/ORA_results.RData'
if(file.exists(file_name)){
load(file_name)
} else {
# Prepare input
universe = EA_dataset$entrezgene %>% as.character
# Perform Enrichment
ORA_enrichment = list()
for(module in top_modules){
genes_in_module = EA_dataset %>% filter(Module == module) %>% pull(entrezgene)
ORA_GO = enrichGO(gene = genes_in_module, universe = universe, OrgDb = org.Hs.eg.db, ont = 'All',
pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1, qvalueCutoff = 1)
ORA_DO = enrichDO(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1)
ORA_DGN = enrichDGN(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1)
ORA_KEGG = enrichKEGG(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1)
ORA_Reactome = enrichPathway(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = 0.1)
ORA_enrichment[[module]] = list('GO' = ORA_GO, 'DO' = ORA_DO, 'DGN' = ORA_DGN, 'KEGG' = ORA_KEGG,
'Reactome' = ORA_Reactome)
# Save after each iteration
save(ORA_enrichment, file = file_name)
}
rm(universe, genes_in_module, module, ORA_GO, ORA_DGN, ORA_DO, ORA_KEGG, ORA_Reactome)
}
################################################################################################################
# Get shared enrichment for each module
top_modules_enrichment = list()
for(module in top_modules){
module_enrichment = list()
GSEA_enrichment_for_module = GSEA_enrichment[[module]]
ORA_enrichment_for_module = ORA_enrichment[[module]]
for(dataset in c('KEGG', 'Reactome', 'GO', 'DO', 'DGN')){
GSEA_enrichment_dataset = GSEA_enrichment_for_module[[dataset]] %>% data.frame %>%
dplyr::rename('pvalue_GSEA' = pvalue, 'p.adjust_GSEA' = p.adjust, 'qvalues_GSEA' = qvalues)
ORA_enrichment_dataset = ORA_enrichment_for_module[[dataset]] %>% data.frame %>%
dplyr::rename('pvalue_ORA' = pvalue, 'p.adjust_ORA' = p.adjust, 'qvalue_ORA' = qvalue)
# Get shared enrichments (if any)
shared_enrichment_dataset = GSEA_enrichment_dataset %>% inner_join(ORA_enrichment_dataset, by = 'ID')
module_enrichment[[dataset]] = shared_enrichment_dataset
}
top_modules_enrichment[[module]] = module_enrichment
}
save(top_modules_enrichment, file = './../Data/preprocessedData/top_modules_enrichment.RData')
rm(module, module_enrichment, GSEA_enrichment_for_module, ORA_enrichment_for_module, dataset,
GSEA_enrichment_dataset, ORA_enrichment_dataset, shared_enrichment_dataset)
}
top_modules_mtcor = top_modules[1:3]
compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_mtcor, 'GO')
Enrichment results for cluster 20:
- GSEA has 36 enriched term(s)
- ORA has 1 enriched term(s)
- 0 terms are enriched in both methods
Enrichment results for cluster 36:
- GSEA has 316 enriched term(s)
- ORA has 39 enriched term(s)
- 18 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| GO:0048514 | blood vessel morphogenesis | 0.0000006 | 0.0805837 | 0.0000004 | 50/517 |
| GO:0042060 | wound healing | 0.0000363 | 0.0813934 | 0.0000048 | 44/517 |
| GO:0001525 | angiogenesis | 0.0000023 | 0.0822846 | 0.0000007 | 44/517 |
| GO:0006333 | chromatin assembly or disassembly | 0.0016454 | 0.0922041 | 0.0001126 | 21/517 |
| GO:0031497 | chromatin assembly | 0.0004459 | 0.0935875 | 0.0000397 | 20/517 |
| GO:0034728 | nucleosome organization | 0.0014996 | 0.0927348 | 0.0001112 | 20/517 |
| GO:0006334 | nucleosome assembly | 0.0000482 | 0.0946569 | 0.0000048 | 20/517 |
| GO:0030219 | megakaryocyte differentiation | 0.0008059 | 0.0966219 | 0.0000652 | 16/517 |
| GO:0009617 | response to bacterium | 0.0230690 | 0.0824462 | 0.0011402 | 35/517 |
| GO:0006323 | DNA packaging | 0.0162955 | 0.0914236 | 0.0009061 | 20/517 |
| GO:0050865 | regulation of cell activation | 0.0275775 | 0.0816087 | 0.0012913 | 37/517 |
| GO:0045652 | regulation of megakaryocyte differentiation | 0.0130287 | 0.0979723 | 0.0007727 | 13/517 |
| GO:1903706 | regulation of hemopoiesis | 0.0427370 | 0.0826343 | 0.0019010 | 34/517 |
| GO:0030099 | myeloid cell differentiation | 0.0465264 | 0.0839342 | 0.0019710 | 31/517 |
| GO:0060964 | regulation of gene silencing by miRNA | 0.0594324 | 0.0949961 | 0.0022988 | 15/517 |
| GO:1904035 | regulation of epithelial cell apoptotic process | 0.0577392 | 0.0992782 | 0.0022988 | 11/517 |
| GO:0060147 | regulation of posttranscriptional gene silencing | 0.0740821 | 0.0949420 | 0.0025348 | 15/517 |
| GO:0060966 | regulation of gene silencing by RNA | 0.0740821 | 0.0949420 | 0.0025348 | 15/517 |
Genes involved in top 5 enriched terms: ABL1, ACP5, ADAM8, ANPEP, ATP11C, B4GALT1, BANK1, BIN3, BTG1, C5AR1, CCL2, CD274, CD55, CDK19, CDKN1A, CELSR1, CFLAR, CLCF1, COL4A1, COL4A2, COL8A2, CPQ, CTGF, CUBN, CXCL2, CYR61, DOCK6, EIF2AK3, EMILIN1, ENPP3, EPPK1, ESM1, EYA1, FOS, GCLC, GRAP2, GSN, HBEGF, HIF3A, HIST1H2BC, HIST1H2BE, HIST1H2BF, HIST1H2BG, HIST1H2BI, HIST1H3A, HIST1H3B, HIST1H3C, HIST1H3D, HIST1H3E, HIST1H3F, HIST1H3H, HIST1H3I, HIST1H3J, HSPB1, IFRD1, IL6, IL7, IL7R, IL8, INHBA, ITGA5, JUNB, LAMA5, LEMD3, LOXL3, LRP5, LYST, MAFF, MAFK, MAPK7, MED1, MMP14, MMP2, MT2A, MTDH, MYH9, MYLK, MYOF, MYOZ1, MZB1, NOD2, NOTCH3, NR4A1, PDE4B, PELI1, PHLDB2, PLAU, PLAUR, PNP, PRRX1, PTGER4, PTGS2, PTPN22, RAPGEF2, RBM15, RCOR1, RELA, RIPK2, RIPK3, RREB1, RUNX3, SAT1, SEMA5A, SERPINE1, SGMS1, SH2B3, SH2D2A, SHB, SLC7A11, SPHK1, TAL1, TFPI2, TGFBI, THBS1, TIGIT, TIMP1, TLN1, TNFRSF12A, TNFSF8, TRAF6, UNC13B, VASH1, ZC3H12A
Genes involved in all enriched terms: ABL1, ACP5, ADAM8, AGO4, ANPEP, ATP11C, B4GALT1, BANK1, BIN3, BTG1, C5AR1, CCL2, CD248, CD274, CD55, CDK19, CDKN1A, CELSR1, CFLAR, CLCF1, COL4A1, COL4A2, COL8A2, CPQ, CTGF, CUBN, CXCL2, CYR61, DDX5, DOCK6, DYRK3, EIF2AK3, EMILIN1, ENPP3, EPPK1, ESM1, EYA1, FOS, FSTL3, GCLC, GRAP2, GSN, HAT1, HBEGF, HIF3A, HIST1H1D, HIST1H2BC, HIST1H2BE, HIST1H2BF, HIST1H2BG, HIST1H2BI, HIST1H3A, HIST1H3B, HIST1H3C, HIST1H3D, HIST1H3E, HIST1H3F, HIST1H3H, HIST1H3I, HIST1H3J, HP1BP3, HSPB1, IFRD1, IL6, IL7, IL7R, IL8, INHBA, ITGA5, JUNB, KAT6A, LAMA5, LEMD3, LIF, LOXL3, LRP5, LTBR, LYST, MAFF, MAFK, MAPK7, MED1, MMP14, MMP2, MT2A, MTDH, MYH9, MYLK, MYOF, MYOZ1, MZB1, NOD2, NOTCH3, NR4A1, NRROS, NUP188, NUP98, NUPR1, OSM, PDE4B, PELI1, PHLDB2, PLAU, PLAUR, PNP, PRRX1, PSMC1, PTGER4, PTGS2, PTPN22, RANBP2, RAPGEF2, RBM15, RCOR1, RELA, RIPK2, RIPK3, RREB1, RUNX3, SAT1, SEMA5A, SERPINE1, SGMS1, SH2B3, SH2D2A, SHB, SLC7A11, SOX6, SPHK1, SPTY2D1, TAL1, TCF12, TCIRG1, TET2, TFPI2, TGFBI, THBS1, TIGIT, TIMP1, TLN1, TMOD3, TNFRSF12A, TNFSF8, TRAF6, UNC13B, VASH1, ZC3H12A
Enrichment results for cluster 45:
- GSEA has 0 enriched term(s)
- ORA has 73 enriched term(s)
- 0 terms are enriched in both methods
Plots of the results when there are more than 5 terms in common between methods:
plot_results(top_modules_enrichment, top_modules_mtcor, 'GO')
plot_shared_genes(top_modules_enrichment, top_modules_mtcor, 'GO')
compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_mtcor, 'DO')
Enrichment results for cluster 20:
- GSEA has 77 enriched term(s)
- ORA has 495 enriched term(s)
- 0 terms are enriched in both methods
Enrichment results for cluster 36:
- GSEA has 206 enriched term(s)
- ORA has 670 enriched term(s)
- 15 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| DOID:7148 | rheumatoid arthritis | 0.0000009 | 0.0110111 | 0.0000006 | 48/284 |
| DOID:2394 | ovarian cancer | 0.0008573 | 0.0116314 | 0.0001688 | 31/284 |
| DOID:289 | endometriosis | 0.0000344 | 0.0131491 | 0.0000116 | 16/284 |
| DOID:2151 | malignant ovarian surface epithelial-stromal neoplasm | 0.0014993 | 0.0117612 | 0.0001688 | 29/284 |
| DOID:2152 | ovary epithelial cancer | 0.0014993 | 0.0117612 | 0.0001688 | 29/284 |
| DOID:4001 | ovarian carcinoma | 0.0014993 | 0.0117612 | 0.0001688 | 29/284 |
| DOID:120 | female reproductive organ cancer | 0.0137553 | 0.0111429 | 0.0013275 | 36/284 |
| DOID:552 | pneumonia | 0.0213572 | 0.0130622 | 0.0017746 | 13/284 |
| DOID:170 | endocrine gland cancer | 0.0365661 | 0.0109738 | 0.0019002 | 38/284 |
| DOID:3070 | malignant glioma | 0.0236419 | 0.0242302 | 0.0017746 | 22/284 |
| DOID:345 | uterine disease | 0.0316550 | 0.0272338 | 0.0019002 | 9/284 |
| DOID:229 | female reproductive system disease | 0.0359228 | 0.0248975 | 0.0019002 | 18/284 |
| DOID:288 | endometriosis of uterus | 0.0410068 | 0.0288509 | 0.0019788 | 5/284 |
| DOID:3905 | lung carcinoma | 0.0703615 | 0.0109932 | 0.0031689 | 37/284 |
| DOID:5041 | esophageal cancer | 0.0926079 | 0.0125777 | 0.0039102 | 16/284 |
Genes involved in top 5 enriched terms: ABCC3, ACP5, ADAM8, ANPEP, B4GALT1, BANK1, C5AR1, CASP4, CCL2, CCNL1, CD274, CD55, CDKN1A, CFLAR, CTGF, CTSK, CXCL1, CXCL14, CYR61, DNM2, EFEMP2, EIF4EBP3, ERC1, FOS, FOSL1, GRAP2, GSN, HBEGF, HSPB1, IKBKE, IL1RN, IL6, IL7, IL7R, IL8, INHBA, ITGA5, KLF6, KLRC1, LATS2, LHCGR, LIF, LRP5, LTBR, MAP3K2, MCL1, MED1, MFF, MMP14, MMP2, MTDH, MTRR, NAMPT, NFATC1, NOD2, NOTCH3, NUPR1, OSM, PAX8, PLA2G4A, PLAU, PLAUR, PNP, PTGER4, PTGS2, PTPN13, PTPN22, PTPRJ, PXN, RASSF1, RCVRN, RELA, RUNX3, SAT1, SEMA3C, SEMA5A, SERPINE1, SH2D2A, SPHK1, TFPI2, TGFBI, THBS1, TIAM1, TIMP1, TNFRSF10B, TNFRSF10D, TNFRSF12A, TNFSF8, U2AF1, VEGFC, YBX1
Genes involved in all enriched terms: ABCC3, ACP5, ADAM8, AHRR, ANPEP, AXIN1, B4GALT1, BANK1, C5AR1, CASP4, CCL2, CCNL1, CD274, CD55, CDKN1A, CFLAR, CTGF, CTSK, CXCL1, CXCL14, CYR61, DNM2, EFEMP2, EIF4EBP3, ERC1, FOS, FOSL1, FSTL3, GRAP2, GSN, HBEGF, HSPB1, IKBKE, IL1RN, IL6, IL7, IL7R, IL8, INHBA, ITGA5, KLF6, KLRC1, LAMA5, LAMC1, LATS2, LHCGR, LIF, LRP5, LTBR, MAP3K2, MCL1, MED1, MFF, MMP14, MMP2, MT2A, MTDH, MTRR, MYLK, NAMPT, NFATC1, NOD2, NOTCH3, NUPR1, OSM, PAX8, PLA2G4A, PLAU, PLAUR, PNP, PTGER4, PTGS2, PTPN13, PTPN22, PTPRJ, PXN, RASSF1, RCVRN, RELA, RFX1, RUNX3, SAT1, SEMA3C, SEMA5A, SERPINE1, SH2D2A, SPHK1, TFPI2, TGFBI, THBS1, TIAM1, TIMP1, TNFRSF10B, TNFRSF10D, TNFRSF12A, TNFSF8, U2AF1, VEGFC, YBX1
Enrichment results for cluster 45:
- GSEA has 237 enriched term(s)
- ORA has 417 enriched term(s)
- 1 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| DOID:0060100 | musculoskeletal system cancer | 0.0341421 | 0.0145544 | 0.0146455 | 12/54 |
Genes involved in all enriched terms: ACKR3, AR, BGLAP, CD163, CDH3, FOXO1, GSTP1, ICAM1, MAP3K5, SERPINF1, YAP1, ZIC1
Plots of the results when there are more than 5 terms in common between methods:
plot_results(top_modules_enrichment, top_modules_mtcor, 'DO')
plot_shared_genes(top_modules_enrichment, top_modules_mtcor, 'DO')
compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_mtcor, 'DGN')
Enrichment results for cluster 20:
- GSEA has 40 enriched term(s)
- ORA has 1957 enriched term(s)
- 0 terms are enriched in both methods
Enrichment results for cluster 36:
- GSEA has 239 enriched term(s)
- ORA has 2566 enriched term(s)
- 20 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| umls:C0003864 | Arthritis | 0.0010858 | 0.0482080 | 0.0001500 | 40/498 |
| umls:C0032285 | Pneumonia | 0.0000204 | 0.0500942 | 0.0000085 | 36/498 |
| umls:C3714514 | Infection | 0.0033328 | 0.0475979 | 0.0002893 | 41/498 |
| umls:C0024115 | Lung diseases | 0.0003938 | 0.0506939 | 0.0000816 | 31/498 |
| umls:C1519670 | Tumor Angiogenesis | 0.0022508 | 0.0492427 | 0.0002331 | 35/498 |
| umls:C3495559 | Juvenile arthritis | 0.0009317 | 0.0508547 | 0.0001500 | 30/498 |
| umls:C0015944 | Fetal Membranes, Premature Rupture | 0.0022080 | 0.0538436 | 0.0002331 | 21/498 |
| umls:C0333307 | Superficial ulcer | 0.0001155 | 0.0561457 | 0.0000319 | 18/498 |
| umls:C2919032 | Infection of amniotic sac and membranes, unspecified, unspecified trimester, not applicable or unspecified | 0.0046841 | 0.0539614 | 0.0003235 | 20/498 |
| umls:C2986658 | Diffuse Intrinsic Pontine Glioma | 0.0000000 | 0.0618375 | 0.0000000 | 11/498 |
| umls:C0013537 | Eclampsia | 0.0089283 | 0.0542816 | 0.0005679 | 19/498 |
| umls:C0008495 | Chorioamnionitis | 0.0095944 | 0.0537997 | 0.0005679 | 20/498 |
| umls:C0011644 | Scleroderma | 0.0123621 | 0.0546344 | 0.0006829 | 18/498 |
| umls:C0022876 | Premature Obstetric Labor | 0.0187841 | 0.0535026 | 0.0009728 | 20/498 |
| umls:C0003486 | Aortic Aneurysm | 0.0247327 | 0.0534035 | 0.0011386 | 20/498 |
| umls:C0042133 | Uterine Fibroids | 0.0369282 | 0.0506654 | 0.0016106 | 27/498 |
| umls:C0042373 | Vascular Diseases | 0.0434200 | 0.0497743 | 0.0017133 | 30/498 |
| umls:C0151526 | Premature Birth | 0.0549919 | 0.0516364 | 0.0018228 | 24/498 |
| umls:C0004623 | Bacterial Infections | 0.0814674 | 0.0530878 | 0.0024110 | 20/498 |
| umls:C1335302 | Pancreatic Ductal Adenocarcinoma | 0.0996878 | 0.0476237 | 0.0027848 | 37/498 |
Genes involved in top 5 enriched terms: ABL1, ACP5, ADAM8, ANPEP, AOC2, APOBEC3C, APOBEC3F, BIRC3, BTN3A2, BTN3A3, C5AR1, CCL2, CD248, CD274, CD55, CDKN1A, CFB, CHI3L2, COL4A1, CTGF, CTSK, CXCL1, CYP4Z1, CYR61, DNM2, DOCK6, DUSP2, DUSP5, EIF2AK3, ELF1, ELF4, EPHA3, ESM1, FBXO32, FCGRT, FLVCR2, FNDC3A, FOS, FOSB, FOXL1, GBP1, GRAP2, HBEGF, HSPB1, IFNGR2, IKBKE, IL1RN, IL6, IL7, IL8, ITGA5, JUNB, KLF6, KLRC1, LAMA5, LIF, LIPC, LTBR, MAPK7, MKNK2, MMP14, MMP2, MTDH, MTRR, MYLK, NAMPT, NFATC1, NOD2, NOTCH3, NR4A1, NR4A2, NUPR1, OSM, PLA2G4A, PLAU, PLAUR, PTBP1, PTGS2, PTPN22, PTPRJ, PXN, RASSF1, RELA, RGL2, RIPK2, RNF19A, RUNX3, SERPINE1, SH2B3, SIGLEC7, SLC16A3, SPHK1, TFPI2, THBS1, TIMP1, TNFAIP6, TNFRSF10B, TNFRSF10D, TNNC1, TRAF6, TTLL4, VASH1, VEGFC, YBX1, ZHX2, ZNRD1
Genes involved in all enriched terms: ABCC3, ABL1, ACP5, ADAM8, AFF4, ANPEP, AOC2, APOBEC3C, APOBEC3F, BANK1, BIRC3, BMP1, BTG1, BTG3, BTN3A2, BTN3A3, C2orf88, C5AR1, CALCR, CCL2, CD248, CD274, CD55, CDK19, CDKN1A, CFB, CFLAR, CHI3L2, COL4A1, COL4A2, CREM, CTGF, CTSK, CXCL1, CXCL14, CXCL2, CYP4Z1, CYR61, DNM2, DOCK6, DUSP2, DUSP5, EIF2AK3, ELF1, ELF4, EMP1, EPHA3, EPHX3, ESM1, FBXO32, FCGRT, FLVCR2, FNDC3A, FOS, FOSB, FOSL1, FOXL1, FSTL3, FUT7, GAL3ST1, GBP1, GCLC, GEM, GHR, GPRC5A, GRAP2, GSN, HBEGF, HIST1H2BC, HIST1H2BE, HIST1H2BF, HIST1H2BG, HIST1H2BI, HIST1H3A, HIST1H3B, HIST1H3C, HIST1H3D, HIST1H3E, HIST1H3F, HIST1H3H, HIST1H3I, HIST1H3J, HSPB1, IFNGR2, IFRD1, IKBKE, IL1RN, IL6, IL7, IL7R, IL8, INHBA, INTS12, ITGA5, JUNB, KAT6A, KCNQ1, KLF6, KLRC1, LAMA5, LHCGR, LIF, LIPC, LRP5, LSP1, LTBP1, LTBR, MAFF, MAPK7, MCL1, MKNK2, MMP14, MMP2, MTDH, MTRR, MYLK, NAMPT, NFATC1, NOD2, NOTCH3, NPW, NR4A1, NR4A2, NUPR1, OSM, PACSIN2, PARD3, PDE7A, PLA2G4A, PLAU, PLAUR, PTBP1, PTGER4, PTGS2, PTPN22, PTPRJ, PXN, RASSF1, RELA, RGL2, RIPK2, RIPK3, RNF19A, RUNX3, SERPINE1, SH2B3, SH2D2A, SIGLEC7, SLC16A3, SPHK1, TFPI2, TGFBI, THBS1, TIMP1, TLN1, TNFAIP6, TNFRSF10B, TNFRSF10D, TNNC1, TRAF6, TTLL4, VASH1, VASP, VEGFC, YBX1, ZC3H12A, ZHX2, ZNRD1
Enrichment results for cluster 45:
- GSEA has 315 enriched term(s)
- ORA has 1470 enriched term(s)
- 8 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| umls:C0036421 | Systemic Scleroderma | 0.0000000 | 0.0633648 | 0.0000000 | 24/107 |
| umls:C0008626 | Congenital chromosomal disease | 0.0000000 | 0.0634023 | 0.0000000 | 22/107 |
| umls:C0346429 | Multiple malignancy | 0.0000000 | 0.0655068 | 0.0000000 | 18/107 |
| umls:C0206659 | Embryonal Carcinoma | 0.0000000 | 0.0661103 | 0.0000000 | 15/107 |
| umls:C0278504 | Non-small cell lung cancer stage I | 0.0000000 | 0.0667783 | 0.0000000 | 15/107 |
| umls:C0238288 | Muscular Dystrophy, Facioscapulohumeral | 0.0000000 | 0.0667991 | 0.0000000 | 16/107 |
| umls:C1449563 | Cardiomyopathy, Familial Idiopathic | 0.0014829 | 0.0655606 | 0.0001392 | 10/107 |
| umls:C0000786 | Spontaneous abortion | 0.0515450 | 0.0654930 | 0.0036295 | 9/107 |
Genes involved in top 5 enriched terms: ACD, ACKR3, ANTXR1, AR, CD163, CLIC1, CYBRD1, EPHX1, GSTO2, GSTP1, HFE, HIST1H4A, HIST1H4B, HIST1H4C, HIST1H4D, HIST1H4E, HIST1H4F, HIST1H4H, HIST1H4I, HIST1H4J, HIST1H4K, HIST1H4L, HIST4H4, HLA-DQB2, HLA-DRB5, HSPB7, ICAM1, IL1R1, IRF1, KRT18, MYBPC3, SERPINF1, TIMP3, TNFAIP3, TNFRSF4, TNFSF4, WNT3, YAP1
Genes involved in all enriched terms: ACD, ACKR3, ANTXR1, AR, CD163, CLIC1, CYBRD1, EPHX1, GSTO2, GSTP1, HFE, HIST1H4A, HIST1H4B, HIST1H4C, HIST1H4D, HIST1H4E, HIST1H4F, HIST1H4H, HIST1H4I, HIST1H4J, HIST1H4K, HIST1H4L, HIST4H4, HLA-DQB2, HLA-DRB5, HSPB7, ICAM1, IL1R1, IRF1, KRT18, MYBPC3, SERPINF1, TIMP3, TNFAIP3, TNFRSF4, TNFSF4, WNT3, YAP1
Plots of the results when there are more than 5 terms in common between methods:
plot_results(top_modules_enrichment, top_modules_mtcor, 'DGN')
plot_shared_genes(top_modules_enrichment, top_modules_mtcor, 'DGN')
compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_mtcor, 'KEGG')
Enrichment results for cluster 20:
- GSEA has 44 enriched term(s)
- ORA has 268 enriched term(s)
- 0 terms are enriched in both methods
Enrichment results for cluster 36:
- GSEA has 66 enriched term(s)
- ORA has 272 enriched term(s)
- 10 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| hsa04613 | Neutrophil extracellular trap formation | 0.0008962 | 0.0051567 | 0.0002763 | 21/272 |
| hsa04060 | Cytokine-cytokine receptor interaction | 0.0019268 | 0.0051355 | 0.0003895 | 21/272 |
| hsa05322 | Systemic lupus erythematosus | 0.0021056 | 0.0053556 | 0.0003895 | 16/272 |
| hsa05202 | Transcriptional misregulation in cancer | 0.0029734 | 0.0051574 | 0.0004584 | 20/272 |
| hsa04668 | TNF signaling pathway | 0.0002317 | 0.0107648 | 0.0001072 | 17/272 |
| hsa04621 | NOD-like receptor signaling pathway | 0.0133683 | 0.0051923 | 0.0017664 | 18/272 |
| hsa05034 | Alcoholism | 0.0392455 | 0.0051572 | 0.0045374 | 18/272 |
| hsa04061 | Viral protein interaction with cytokine and cytokine receptor | 0.0647984 | 0.0056390 | 0.0061440 | 9/272 |
| hsa04064 | NF-kappa B signaling pathway | 0.0733941 | 0.0054350 | 0.0061713 | 12/272 |
| hsa05131 | Shigellosis | 0.0664269 | 0.0149450 | 0.0061440 | 22/272 |
Genes involved in top 5 enriched terms: ANTXR2, BIRC3, C5AR1, CASP4, CCL2, CDKN1A, CLCF1, CTF1, CXCL1, CXCL14, CXCL2, DDX5, EYA1, FOSB, GBP1, GBP2, GHR, HAT1, HIST1H2BC, HIST1H2BD, HIST1H2BE, HIST1H2BF, HIST1H2BG, HIST1H2BI, HIST1H3A, HIST1H3B, HIST1H3C, HIST1H3D, HIST1H3E, HIST1H3F, HIST1H3H, HIST1H3I, HIST1H3J, IFNGR2, IKBKE, IL1RN, IL6, IL7, IL7R, IL8, INHBA, LIF, LTBR, MAP3K7, NAMPT, NOD2, NUPR1, OSM, PAX8, PLAU, RELA, RIPK2, RIPK3, TNFRSF10B, TNFRSF10D, TNFRSF12A, TNFSF8, TRAF6
Genes involved in all enriched terms: ANTXR2, BIRC3, C5AR1, CASP4, CCL2, CDKN1A, CFLAR, CLCF1, CTF1, CXCL1, CXCL14, CXCL2, DDX5, ERC1, EYA1, FNBP1L, FOS, FOSB, GBP1, GBP2, GHR, HAT1, HIST1H2BC, HIST1H2BD, HIST1H2BE, HIST1H2BF, HIST1H2BG, HIST1H2BI, HIST1H3A, HIST1H3B, HIST1H3C, HIST1H3D, HIST1H3E, HIST1H3F, HIST1H3H, HIST1H3I, HIST1H3J, IFNGR2, IKBKE, IL1RN, IL6, IL7, IL7R, IL8, INHBA, ITGA5, JUNB, LIF, LTBR, MAP3K7, MLKL, MMP14, NAMPT, NOD2, NUPR1, OSM, PAX8, PLAU, PLCE1, PTGS2, PXN, RELA, RIPK2, RIPK3, TLN1, TNFRSF10B, TNFRSF10D, TNFRSF12A, TNFSF8, TRAF6, U2AF1, VEGFC
Enrichment results for cluster 45:
- GSEA has 64 enriched term(s)
- ORA has 131 enriched term(s)
- 4 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| hsa05203 | Viral carcinogenesis | 2e-07 | 0.0062888 | 0 | 14/63 |
| hsa05034 | Alcoholism | 0e+00 | 0.0063023 | 0 | 14/63 |
| hsa04613 | Neutrophil extracellular trap formation | 0e+00 | 0.0063053 | 0 | 15/63 |
| hsa05322 | Systemic lupus erythematosus | 0e+00 | 0.0063675 | 0 | 15/63 |
Plots of the results when there are more than 5 terms in common between methods:
plot_results(top_modules_enrichment, top_modules_mtcor, 'KEGG')
plot_shared_genes(top_modules_enrichment, top_modules_mtcor, 'KEGG')
compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_mtcor, 'Reactome')
Enrichment results for cluster 20:
- GSEA has 89 enriched term(s)
- ORA has 729 enriched term(s)
- 0 terms are enriched in both methods
Enrichment results for cluster 36:
- GSEA has 197 enriched term(s)
- ORA has 888 enriched term(s)
- 50 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| R-HSA-449147 | Signaling by Interleukins | 0.0000124 | 0.0190427 | 0.0000005 | 39/332 |
| R-HSA-9006931 | Signaling by Nuclear Receptors | 0.0002917 | 0.0201304 | 0.0000068 | 27/332 |
| R-HSA-8878171 | Transcriptional regulation by RUNX1 | 0.0000998 | 0.0203808 | 0.0000030 | 26/332 |
| R-HSA-8939211 | ESR-mediated signaling | 0.0000310 | 0.0205241 | 0.0000011 | 26/332 |
| R-HSA-2559583 | Cellular Senescence | 0.0000557 | 0.0207968 | 0.0000018 | 24/332 |
| R-HSA-9018519 | Estrogen-dependent gene expression | 0.0000062 | 0.0213593 | 0.0000003 | 22/332 |
| R-HSA-68875 | Mitotic Prophase | 0.0000189 | 0.0214390 | 0.0000007 | 21/332 |
| R-HSA-3214847 | HATs acetylate histones | 0.0001080 | 0.0214177 | 0.0000031 | 20/332 |
| R-HSA-211000 | Gene Silencing by RNA | 0.0000071 | 0.0215555 | 0.0000004 | 21/332 |
| R-HSA-1474165 | Reproduction | 0.0000674 | 0.0216157 | 0.0000021 | 19/332 |
| R-HSA-8939236 | RUNX1 regulates transcription of genes involved in differentiation of HSCs | 0.0000106 | 0.0216828 | 0.0000005 | 20/332 |
| R-HSA-1500620 | Meiosis | 0.0000423 | 0.0218768 | 0.0000014 | 18/332 |
| R-HSA-2559580 | Oxidative Stress Induced Senescence | 0.0002046 | 0.0217152 | 0.0000051 | 18/332 |
| R-HSA-2559582 | Senescence-Associated Secretory Phenotype (SASP) | 0.0000000 | 0.0219327 | 0.0000000 | 22/332 |
| R-HSA-1912422 | Pre-NOTCH Expression and Processing | 0.0000050 | 0.0219327 | 0.0000003 | 19/332 |
| R-HSA-5578749 | Transcriptional regulation by small RNAs | 0.0000030 | 0.0219759 | 0.0000002 | 19/332 |
| R-HSA-427413 | NoRC negatively regulates rRNA expression | 0.0001142 | 0.0219759 | 0.0000031 | 17/332 |
| R-HSA-5250913 | Positive epigenetic regulation of rRNA expression | 0.0001142 | 0.0219759 | 0.0000031 | 17/332 |
| R-HSA-977225 | Amyloid fiber formation | 0.0000048 | 0.0221053 | 0.0000003 | 18/332 |
| R-HSA-5250941 | Negative epigenetic regulation of rRNA expression | 0.0001783 | 0.0219327 | 0.0000047 | 17/332 |
| R-HSA-73864 | RNA Polymerase I Transcription | 0.0002377 | 0.0218768 | 0.0000057 | 17/332 |
| R-HSA-73854 | RNA Polymerase I Promoter Clearance | 0.0002061 | 0.0219158 | 0.0000051 | 17/332 |
| R-HSA-3214815 | HDACs deacetylate histones | 0.0000189 | 0.0222123 | 0.0000007 | 17/332 |
| R-HSA-1912408 | Pre-NOTCH Transcription and Translation | 0.0000019 | 0.0222440 | 0.0000002 | 18/332 |
| R-HSA-8936459 | RUNX1 regulates genes involved in megakaryocyte differentiation and platelet function | 0.0000019 | 0.0222440 | 0.0000002 | 18/332 |
| R-HSA-5625740 | RHO GTPases activate PKNs | 0.0000132 | 0.0222440 | 0.0000005 | 17/332 |
| R-HSA-73772 | RNA Polymerase I Promoter Escape | 0.0000091 | 0.0222776 | 0.0000004 | 17/332 |
| R-HSA-5250924 | B-WICH complex positively regulates rRNA expression | 0.0000091 | 0.0222776 | 0.0000004 | 17/332 |
| R-HSA-201722 | Formation of the beta-catenin:TCF transactivating complex | 0.0000062 | 0.0223110 | 0.0000003 | 17/332 |
| R-HSA-912446 | Meiotic recombination | 0.0000140 | 0.0223942 | 0.0000005 | 16/332 |
| R-HSA-427389 | ERCC6 (CSB) and EHMT2 (G9a) positively regulate rRNA expression | 0.0000032 | 0.0225464 | 0.0000002 | 16/332 |
| R-HSA-2299718 | Condensation of Prophase Chromosomes | 0.0000016 | 0.0226604 | 0.0000002 | 16/332 |
| R-HSA-212300 | PRC2 methylates histones and DNA | 0.0000016 | 0.0226604 | 0.0000002 | 16/332 |
| R-HSA-427359 | SIRT1 negatively regulates rRNA expression | 0.0000005 | 0.0227512 | 0.0000001 | 16/332 |
| R-HSA-5625886 | Activated PKN1 stimulates transcription of AR (androgen receptor) regulated genes KLK2 and KLK3 | 0.0000002 | 0.0228607 | 0.0000000 | 16/332 |
| R-HSA-5334118 | DNA methylation | 0.0000001 | 0.0229205 | 0.0000000 | 16/332 |
| R-HSA-73728 | RNA Polymerase I Promoter Opening | 0.0000001 | 0.0229205 | 0.0000000 | 16/332 |
| R-HSA-5617472 | Activation of anterior HOX genes in hindbrain development during early embryogenesis | 0.0012447 | 0.0218768 | 0.0000277 | 16/332 |
| R-HSA-5619507 | Activation of HOX genes during differentiation | 0.0012447 | 0.0218768 | 0.0000277 | 16/332 |
| R-HSA-1266695 | Interleukin-7 signaling | 0.0000009 | 0.0235953 | 0.0000001 | 12/332 |
| R-HSA-3214842 | HDMs demethylate histones | 0.0014118 | 0.0230764 | 0.0000307 | 11/332 |
| R-HSA-212165 | Epigenetic regulation of gene expression | 0.0034402 | 0.0213101 | 0.0000713 | 18/332 |
| R-HSA-6783783 | Interleukin-10 signaling | 0.0024551 | 0.0235745 | 0.0000521 | 9/332 |
| R-HSA-157118 | Signaling by NOTCH | 0.0073706 | 0.0203308 | 0.0001493 | 23/332 |
| R-HSA-6785807 | Interleukin-4 and Interleukin-13 signaling | 0.0172716 | 0.0222403 | 0.0003347 | 13/332 |
| R-HSA-1474228 | Degradation of the extracellular matrix | 0.0254273 | 0.0216776 | 0.0004823 | 15/332 |
| R-HSA-3214841 | PKMTs methylate histone lysines | 0.0366716 | 0.0225905 | 0.0006810 | 11/332 |
| R-HSA-201681 | TCF dependent signaling in response to WNT | 0.0414055 | 0.0204004 | 0.0007533 | 21/332 |
| R-HSA-3247509 | Chromatin modifying enzymes | 0.0957696 | 0.0199961 | 0.0016417 | 23/332 |
| R-HSA-4839726 | Chromatin organization | 0.0957696 | 0.0199961 | 0.0016417 | 23/332 |
Genes involved in top 5 enriched terms: AGO4, AXIN1, B4GALT1, BRPF1, CCL2, CDKN1A, CLCF1, CTF1, CXCL1, CXCL2, DDX5, DHRS4, DHRS4L2, DTX2, FOS, FOSB, HAT1, HBEGF, HEYL, HIST1H2BC, HIST1H2BD, HIST1H2BE, HIST1H2BF, HIST1H2BG, HIST1H2BI, HIST1H3A, HIST1H3B, HIST1H3C, HIST1H3D, HIST1H3E, HIST1H3F, HIST1H3H, HIST1H3I, HIST1H3J, HSPB1, IL1RN, IL6, IL7, IL7R, IL8, JUNB, KAT6A, KDM1B, LAMA5, LIF, MAP3K7, MAPK7, MBIP, MCL1, MED1, MMP2, NOD2, NOTCH3, OSM, PELI1, PSMC1, PTGS2, PTPN13, RCOR1, RELA, RIPK2, SPHK1, TIMP1, TRAF6, YBX1
Genes involved in all enriched terms: ABL1, ADAM8, AGO4, AXIN1, B4GALT1, BMP1, BRPF1, CAPN12, CCL2, CDKN1A, CLCF1, COL11A1, COL4A1, COL4A2, COL8A2, CTF1, CTSK, CTSL, CXCL1, CXCL2, DDX5, DHRS4, DHRS4L2, DIDO1, DTX2, ELF1, FOS, FOSB, GSN, HAT1, HBEGF, HEYL, HIST1H1D, HIST1H2BC, HIST1H2BD, HIST1H2BE, HIST1H2BF, HIST1H2BG, HIST1H2BI, HIST1H3A, HIST1H3B, HIST1H3C, HIST1H3D, HIST1H3E, HIST1H3F, HIST1H3H, HIST1H3I, HIST1H3J, HSPB1, IL1RN, IL6, IL7, IL7R, IL8, JUNB, KAT6A, KDM1B, LAMA5, LAMC1, LEMD3, LIF, LRP5, MAP3K7, MAPK7, MBIP, MCL1, MED1, MMP14, MMP2, MYH9, NOD2, NOTCH3, NUMA1, NUP188, NUP98, OSM, PELI1, PHYKPL, PSMC1, PTGS2, PTPN13, RANBP2, RCOR1, RELA, RIPK2, RUNX3, SOX6, SPHK1, SYNE2, TAL1, TCF12, TDRD1, TET2, TGFBI, THBS1, TIMP1, TRAF6, YBX1, ZNRD1
Enrichment results for cluster 45:
- GSEA has 223 enriched term(s)
- ORA has 309 enriched term(s)
- 70 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| R-HSA-157118 | Signaling by NOTCH | 1.8e-06 | 0.0252188 | 0e+00 | 14/78 |
| R-HSA-3247509 | Chromatin modifying enzymes | 1.7e-06 | 0.0252212 | 0e+00 | 15/78 |
| R-HSA-4839726 | Chromatin organization | 1.7e-06 | 0.0252212 | 0e+00 | 15/78 |
| R-HSA-8878171 | Transcriptional regulation by RUNX1 | 1.0e-07 | 0.0252716 | 0e+00 | 15/78 |
| R-HSA-9006931 | Signaling by Nuclear Receptors | 5.8e-06 | 0.0252883 | 1e-07 | 14/78 |
| R-HSA-201681 | TCF dependent signaling in response to WNT | 0.0e+00 | 0.0253050 | 0e+00 | 16/78 |
| R-HSA-8939211 | ESR-mediated signaling | 6.0e-07 | 0.0253841 | 0e+00 | 14/78 |
| R-HSA-2559583 | Cellular Senescence | 0.0e+00 | 0.0255151 | 0e+00 | 17/78 |
| R-HSA-9018519 | Estrogen-dependent gene expression | 0.0e+00 | 0.0255433 | 0e+00 | 14/78 |
| R-HSA-212165 | Epigenetic regulation of gene expression | 0.0e+00 | 0.0255632 | 0e+00 | 14/78 |
| R-HSA-3214847 | HATs acetylate histones | 0.0e+00 | 0.0256190 | 0e+00 | 14/78 |
| R-HSA-73854 | RNA Polymerase I Promoter Clearance | 0.0e+00 | 0.0256302 | 0e+00 | 14/78 |
| R-HSA-211000 | Gene Silencing by RNA | 0.0e+00 | 0.0256376 | 0e+00 | 15/78 |
| R-HSA-1500620 | Meiosis | 0.0e+00 | 0.0256381 | 0e+00 | 16/78 |
| R-HSA-73864 | RNA Polymerase I Transcription | 0.0e+00 | 0.0256381 | 0e+00 | 14/78 |
| R-HSA-5617472 | Activation of anterior HOX genes in hindbrain development during early embryogenesis | 0.0e+00 | 0.0256381 | 0e+00 | 14/78 |
| R-HSA-5619507 | Activation of HOX genes during differentiation | 0.0e+00 | 0.0256381 | 0e+00 | 14/78 |
| R-HSA-68875 | Mitotic Prophase | 0.0e+00 | 0.0256430 | 0e+00 | 15/78 |
| R-HSA-8939236 | RUNX1 regulates transcription of genes involved in differentiation of HSCs | 0.0e+00 | 0.0256513 | 0e+00 | 15/78 |
| R-HSA-2559580 | Oxidative Stress Induced Senescence | 0.0e+00 | 0.0256562 | 0e+00 | 15/78 |
| R-HSA-1474165 | Reproduction | 0.0e+00 | 0.0256577 | 0e+00 | 17/78 |
| R-HSA-1912422 | Pre-NOTCH Expression and Processing | 0.0e+00 | 0.0256602 | 0e+00 | 14/78 |
| R-HSA-5250941 | Negative epigenetic regulation of rRNA expression | 0.0e+00 | 0.0256602 | 0e+00 | 14/78 |
| R-HSA-2559582 | Senescence-Associated Secretory Phenotype (SASP) | 0.0e+00 | 0.0256602 | 0e+00 | 14/78 |
| R-HSA-73886 | Chromosome Maintenance | 0.0e+00 | 0.0257005 | 0e+00 | 16/78 |
| R-HSA-427413 | NoRC negatively regulates rRNA expression | 0.0e+00 | 0.0257060 | 0e+00 | 14/78 |
| R-HSA-5250913 | Positive epigenetic regulation of rRNA expression | 0.0e+00 | 0.0257060 | 0e+00 | 14/78 |
| R-HSA-5578749 | Transcriptional regulation by small RNAs | 0.0e+00 | 0.0257060 | 0e+00 | 14/78 |
| R-HSA-5693607 | Processing of DNA double-strand break ends | 0.0e+00 | 0.0257119 | 0e+00 | 14/78 |
| R-HSA-3214815 | HDACs deacetylate histones | 0.0e+00 | 0.0257415 | 0e+00 | 14/78 |
| R-HSA-977225 | Amyloid fiber formation | 0.0e+00 | 0.0257638 | 0e+00 | 14/78 |
| R-HSA-69473 | G2/M DNA damage checkpoint | 0.0e+00 | 0.0257638 | 0e+00 | 14/78 |
| R-HSA-5625740 | RHO GTPases activate PKNs | 0.0e+00 | 0.0258239 | 0e+00 | 15/78 |
| R-HSA-1912408 | Pre-NOTCH Transcription and Translation | 0.0e+00 | 0.0258239 | 0e+00 | 14/78 |
| R-HSA-8936459 | RUNX1 regulates genes involved in megakaryocyte differentiation and platelet function | 0.0e+00 | 0.0258239 | 0e+00 | 14/78 |
| R-HSA-73884 | Base Excision Repair | 0.0e+00 | 0.0258962 | 0e+00 | 16/78 |
| R-HSA-73772 | RNA Polymerase I Promoter Escape | 0.0e+00 | 0.0258962 | 0e+00 | 14/78 |
| R-HSA-5250924 | B-WICH complex positively regulates rRNA expression | 0.0e+00 | 0.0258962 | 0e+00 | 14/78 |
| R-HSA-201722 | Formation of the beta-catenin:TCF transactivating complex | 0.0e+00 | 0.0259358 | 0e+00 | 14/78 |
| R-HSA-157579 | Telomere Maintenance | 0.0e+00 | 0.0259866 | 0e+00 | 16/78 |
| R-HSA-5693606 | DNA Double Strand Break Response | 0.0e+00 | 0.0259866 | 0e+00 | 14/78 |
| R-HSA-3214858 | RMTs methylate histone arginines | 0.0e+00 | 0.0259866 | 0e+00 | 14/78 |
| R-HSA-912446 | Meiotic recombination | 0.0e+00 | 0.0260038 | 0e+00 | 14/78 |
| R-HSA-5693565 | Recruitment and ATM-mediated phosphorylation of repair and signaling proteins at DNA double strand breaks | 0.0e+00 | 0.0260058 | 0e+00 | 14/78 |
| R-HSA-2299718 | Condensation of Prophase Chromosomes | 0.0e+00 | 0.0260245 | 0e+00 | 14/78 |
| R-HSA-212300 | PRC2 methylates histones and DNA | 0.0e+00 | 0.0260245 | 0e+00 | 14/78 |
| R-HSA-1221632 | Meiotic synapsis | 0.0e+00 | 0.0260336 | 0e+00 | 16/78 |
| R-HSA-3214841 | PKMTs methylate histone lysines | 0.0e+00 | 0.0260391 | 0e+00 | 14/78 |
| R-HSA-2559586 | DNA Damage/Telomere Stress Induced Senescence | 0.0e+00 | 0.0260437 | 0e+00 | 16/78 |
| R-HSA-4551638 | SUMOylation of chromatin organization proteins | 0.0e+00 | 0.0260508 | 0e+00 | 14/78 |
| R-HSA-427389 | ERCC6 (CSB) and EHMT2 (G9a) positively regulate rRNA expression | 0.0e+00 | 0.0260508 | 0e+00 | 14/78 |
| R-HSA-427359 | SIRT1 negatively regulates rRNA expression | 0.0e+00 | 0.0260985 | 0e+00 | 14/78 |
| R-HSA-5693571 | Nonhomologous End-Joining (NHEJ) | 0.0e+00 | 0.0261066 | 0e+00 | 14/78 |
| R-HSA-606279 | Deposition of new CENPA-containing nucleosomes at the centromere | 0.0e+00 | 0.0261336 | 0e+00 | 14/78 |
| R-HSA-774815 | Nucleosome assembly | 0.0e+00 | 0.0261336 | 0e+00 | 14/78 |
| R-HSA-73929 | Base-Excision Repair, AP Site Formation | 0.0e+00 | 0.0261387 | 0e+00 | 16/78 |
| R-HSA-5625886 | Activated PKN1 stimulates transcription of AR (androgen receptor) regulated genes KLK2 and KLK3 | 0.0e+00 | 0.0261591 | 0e+00 | 15/78 |
| R-HSA-5334118 | DNA methylation | 0.0e+00 | 0.0261663 | 0e+00 | 14/78 |
| R-HSA-73728 | RNA Polymerase I Promoter Opening | 0.0e+00 | 0.0261663 | 0e+00 | 14/78 |
| R-HSA-110328 | Recognition and association of DNA glycosylase with site containing an affected pyrimidine | 0.0e+00 | 0.0261928 | 0e+00 | 16/78 |
| R-HSA-110329 | Cleavage of the damaged pyrimidine | 0.0e+00 | 0.0261928 | 0e+00 | 16/78 |
| R-HSA-73928 | Depyrimidination | 0.0e+00 | 0.0261928 | 0e+00 | 16/78 |
| R-HSA-110330 | Recognition and association of DNA glycosylase with site containing an affected purine | 0.0e+00 | 0.0262010 | 0e+00 | 16/78 |
| R-HSA-110331 | Cleavage of the damaged purine | 0.0e+00 | 0.0262010 | 0e+00 | 16/78 |
| R-HSA-73927 | Depurination | 0.0e+00 | 0.0262010 | 0e+00 | 16/78 |
| R-HSA-3214842 | HDMs demethylate histones | 0.0e+00 | 0.0262323 | 0e+00 | 14/78 |
| R-HSA-171306 | Packaging Of Telomere Ends | 0.0e+00 | 0.0262493 | 0e+00 | 16/78 |
| R-HSA-3108232 | SUMO E3 ligases SUMOylate target proteins | 0.0e+00 | 0.0509826 | 0e+00 | 15/78 |
| R-HSA-69620 | Cell Cycle Checkpoints | 3.0e-05 | 0.0572515 | 3e-07 | 14/78 |
| R-HSA-5693567 | HDR through Homologous Recombination (HRR) or Single Strand Annealing (SSA) | 0.0e+00 | 0.0768981 | 0e+00 | 14/78 |
Genes involved in top 5 enriched terms: HIST1H4A, HIST1H4B, HIST1H4C, HIST1H4D, HIST1H4E, HIST1H4F, HIST1H4H, HIST1H4I, HIST1H4J, HIST1H4K, HIST1H4L, HIST4H4, PADI4, YAP1
Genes involved in all enriched terms: ACD, AR, FZD8, HIST1H4A, HIST1H4B, HIST1H4C, HIST1H4D, HIST1H4E, HIST1H4F, HIST1H4H, HIST1H4I, HIST1H4J, HIST1H4K, HIST1H4L, HIST4H4, HVCN1, MAP3K5, NEK9, PADI4, PLD6, TINF2, WNT3, YAP1
Plots of the results when there are more than 5 terms in common between methods:
plot_results(top_modules_enrichment, top_modules_mtcor, 'Reactome')
plot_shared_genes(top_modules_enrichment, top_modules_mtcor, 'Reactome')
top_modules_SFARI = top_modules[4:6]
compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_SFARI, 'GO')
Enrichment results for cluster 7:
- GSEA has 137 enriched term(s)
- ORA has 104 enriched term(s)
- 21 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| GO:0034762 | regulation of transmembrane transport | 0.0000000 | 0.0697816 | 0.0000000 | 92/1284 |
| GO:0015672 | monovalent inorganic cation transport | 0.0000001 | 0.0711775 | 0.0000000 | 82/1284 |
| GO:0034765 | regulation of ion transmembrane transport | 0.0000000 | 0.0711836 | 0.0000000 | 85/1284 |
| GO:0042391 | regulation of membrane potential | 0.0006281 | 0.0732093 | 0.0000399 | 62/1284 |
| GO:0050804 | modulation of chemical synaptic transmission | 0.0021823 | 0.0721148 | 0.0001200 | 65/1284 |
| GO:0099177 | regulation of trans-synaptic signaling | 0.0024087 | 0.0720863 | 0.0001251 | 65/1284 |
| GO:1904062 | regulation of cation transmembrane transport | 0.0005109 | 0.0752138 | 0.0000367 | 55/1284 |
| GO:0050890 | cognition | 0.0001711 | 0.0761199 | 0.0000160 | 53/1284 |
| GO:0007611 | learning or memory | 0.0000990 | 0.0776834 | 0.0000116 | 49/1284 |
| GO:0032412 | regulation of ion transmembrane transporter activity | 0.0006399 | 0.0783061 | 0.0000399 | 45/1284 |
| GO:0022898 | regulation of transmembrane transporter activity | 0.0016952 | 0.0780232 | 0.0000990 | 45/1284 |
| GO:0032409 | regulation of transporter activity | 0.0028050 | 0.0775511 | 0.0001380 | 46/1284 |
| GO:0006813 | potassium ion transport | 0.0000003 | 0.0808708 | 0.0000001 | 45/1284 |
| GO:2001257 | regulation of cation channel activity | 0.0003698 | 0.0822741 | 0.0000288 | 36/1284 |
| GO:0071804 | cellular potassium ion transport | 0.0000117 | 0.0847120 | 0.0000016 | 34/1284 |
| GO:0071805 | potassium ion transmembrane transport | 0.0000117 | 0.0847120 | 0.0000016 | 34/1284 |
| GO:0023061 | signal release | 0.0169944 | 0.0719902 | 0.0006619 | 63/1284 |
| GO:0050808 | synapse organization | 0.0169944 | 0.0719902 | 0.0006619 | 63/1284 |
| GO:0035637 | multicellular organismal signaling | 0.0222799 | 0.0804569 | 0.0008010 | 36/1284 |
| GO:0048667 | cell morphogenesis involved in neuron differentiation | 0.0366082 | 0.0698008 | 0.0011799 | 73/1284 |
| GO:0007612 | learning | 0.0938417 | 0.0834048 | 0.0024365 | 29/1284 |
Genes involved in top 5 enriched terms: ABCC8, ACTN2, ADCYAP1, ANK2, ASIC2, ASIC3, ATP1A1, ATP1B1, ATP2A1, ATP5G1, ATP6V1A, ATP6V1B2, ATP6V1C1, ATP6V1D, ATP6V1E1, ATP6V1G2, ATP6V1H, B4GALT6, BCL11B, BHLHB9, BLOC1S6, BMP8A, BRAF, C2CD5, CACNA2D3, CACNB4, CACNG5, CADPS, CALM1, CALM2, CALM3, CAPN3, CARTPT, CASQ1, CCK, CD38, CDKL5, CHL1, CHN1, CLCN2, CLOCK, CNIH3, CNTN4, COL25A1, COX4I1, CRH, CRHBP, CTNNA2, CYB5R4, DCC, DCLK1, DGKI, DHX36, DLG3, DNM1L, DNM3, DPP10, DYSF, EFCAB4B, EFNA5, EPHA4, EPHA5, EPHB3, EXOC3L1, FAM115A, FBXO45, FFAR4, FGF12, FGF13, FGF14, FGF8, FGFR2, FLOT1, FLRT3, FZD3, GAD1, GAD2, GAL, GFRA2, GOPC, GPD1L, GPLD1, GPRASP2, GRIA2, GRIA3, GRIN2A, GRIN3A, GSTM2, HCN1, HPRT1, HTR2A, ICA1, ISCU, KALRN, KCNA1, KCNA2, KCNA6, KCNAB1, KCNAB3, KCNB1, KCNB2, KCNC2, KCNE2, KCNF1, KCNH1, KCNH5, KCNH8, KCNIP1, KCNIP2, KCNIP4, KCNJ1, KCNJ3, KCNJ4, KCNJ5, KCNJ6, KCNJ9, KCNMA1, KCNQ3, KCNQ5, KCNRG, KCNS1, KCNS3, KCNV1, KEL, LIN7A, LRRC38, LRRC4C, LRRC7, LRRK2, MAGED2, MAP2, MAP6, MCF2, MEF2C, MYO5A, MYO7A, MYRIP, NALCN, NDEL1, NEFH, NGEF, NMU, NOS2, NRXN3, NSF, NTN4, OLFM1, OPA1, OPRK1, OSTN, OXCT1, PAFAH1B1, PAK1, PAK3, PARK2, PFN2, PIK3CA, PIK3CB, PIK3CD, PPARGC1A, PPP1R9A, PPP3CA, PPP3CB, PREPL, PRKCB, PRKCE, PRKCI, PTK2B, PTPN3, PTPRD, RASGRF1, RBFOX2, RGS4, RHOT1, ROBO2, RPS6KA5, RUFY3, RYR2, SCG5, SCN1A, SCN1B, SCN2A, SCN5A, SCN8A, SCNN1D, SEMA3B, SEMA6A, SESTD1, SIRT4, SIRT6, SLC15A5, SLC17A7, SLC17A8, SLC24A2, SLC25A4, SLC38A1, SLC4A10, SLC5A11, SLC5A8, SLC8A1, SLC8A3, SLC9A5, SLC9B2, SLIT2, SNAP25, SPHK2, SREBF1, STMN1, STXBP5L, SYNJ1, SYP, SYT1, SYT12, SYT4, SZT2, TAC1, TACR1, TBR1, TMEM27, TOP2B, TRPC1, TRPC5, UCHL1, UNC5C, USP33, VIP, VSNL1, YWHAH, ZNF365
Genes involved in all enriched terms: ABCA7, ABCC8, ACTN2, ADCYAP1, ADD2, ANK2, ASIC2, ASIC3, ATAD1, ATP1A1, ATP1B1, ATP2A1, ATP2B1, ATP5G1, ATP6V1A, ATP6V1B2, ATP6V1C1, ATP6V1D, ATP6V1E1, ATP6V1G2, ATP6V1H, B4GALT6, BCL11B, BHLHB9, BID, BLOC1S6, BMP8A, BRAF, C2CD5, CA7, CACNA2D3, CACNB4, CACNG5, CADPS, CALB1, CALM1, CALM2, CALM3, CAMK4, CAPN3, CARTPT, CASQ1, CCK, CD38, CDKL5, CELF4, CHL1, CHN1, CHRM1, CHRM5, CHRNA1, CLCN2, CLOCK, CNIH3, CNTN4, CNTNAP2, COL25A1, COL4A5, CORIN, COX4I1, CPEB3, CRH, CRHBP, CTNNA2, CYB5R4, DCC, DCLK1, DDHD2, DGKB, DGKE, DGKI, DHX36, DLG3, DNM1L, DNM3, DPP10, DRP2, DYSF, EFCAB4B, EFNA5, EPHA4, EPHA5, EPHB3, ERBB4, EXOC3L1, FAM115A, FBXO45, FFAR4, FGF12, FGF13, FGF14, FGF8, FGFR2, FLOT1, FLRT3, FPGT-TNNI3K, FRMPD4, FZD3, FZD5, GABRA1, GABRB2, GABRD, GABRG2, GAD1, GAD2, GAL, GFRA2, GJD2, GLRB, GOPC, GOT1, GPD1L, GPHN, GPLD1, GPRASP2, GRIA2, GRIA3, GRIK1, GRIN2A, GRIN3A, GSTM2, HCN1, HPRT1, HTR2A, ICA1, INA, ISCU, JAKMIP1, KALRN, KAT2A, KCNA1, KCNA2, KCNA6, KCNAB1, KCNAB3, KCNB1, KCNB2, KCNC2, KCNE2, KCNF1, KCNH1, KCNH5, KCNH8, KCNIP1, KCNIP2, KCNIP4, KCNJ1, KCNJ3, KCNJ4, KCNJ5, KCNJ6, KCNJ9, KCNMA1, KCNQ3, KCNQ5, KCNRG, KCNS1, KCNS3, KCNV1, KEL, KIT, KRAS, LIN7A, LRFN5, LRRC38, LRRC4C, LRRC7, LRRK2, LRRN3, LRRTM2, LRRTM3, MAGED2, MAP2, MAP6, MCF2, MEF2C, MEIS2, MLLT11, MME, MYO5A, MYO7A, MYRIP, NALCN, NDEL1, NEFH, NGEF, NLGN4Y, NMU, NOS2, NPTN, NPY2R, NRG1, NRGN, NRXN3, NSF, NSG1, NTN4, OLFM1, OPA1, OPRK1, OSTN, OXCT1, P2RX6, PAFAH1B1, PAK1, PAK3, PAK7, PARK2, PCDH8, PCDHB10, PCDHB11, PCDHB14, PFN2, PIAS1, PICK1, PIK3CA, PIK3CB, PIK3CD, PJA2, PLCB1, PLK2, PPARGC1A, PPP1R9A, PPP3CA, PPP3CB, PREPL, PRKAR2B, PRKCB, PRKCE, PRKCI, PTK2B, PTPN3, PTPRD, RAB39B, RAB3B, RASGRF1, RBFOX2, RGS4, RGS7BP, RHOT1, ROBO2, RPS6KA5, RUFY3, RYR2, SCG5, SCN1A, SCN1B, SCN2A, SCN5A, SCN8A, SCNN1D, SEMA3B, SEMA6A, SESTD1, SGK2, SIRT4, SIRT6, SIX4, SLC15A5, SLC17A7, SLC17A8, SLC24A2, SLC25A36, SLC25A4, SLC38A1, SLC4A10, SLC5A11, SLC5A8, SLC8A1, SLC8A3, SLC9A5, SLC9B2, SLIT2, SLITRK4, SNAP25, SNCB, SPHK2, SREBF1, STMN1, STXBP5L, SYNJ1, SYP, SYT1, SYT12, SYT4, SZT2, TAC1, TACR1, TBR1, TMEM27, TNNI3K, TOP2B, TOR1A, TRPC1, TRPC5, TUSC3, UBE2V1, UBE2V2, UCHL1, UNC5C, USP33, VIP, VSNL1, WASF1, YWHAG, YWHAH, YWHAZ, ZC4H2, ZNF365
Enrichment results for cluster 22:
- GSEA has 279 enriched term(s)
- ORA has 5 enriched term(s)
- 0 terms are enriched in both methods
Enrichment results for cluster 39:
- GSEA has 173 enriched term(s)
- ORA has 8 enriched term(s)
- 3 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| GO:0032386 | regulation of intracellular transport | 0.0382956 | 0.0757639 | 0.0270058 | 23/302 |
| GO:0051648 | vesicle localization | 0.0565921 | 0.0801804 | 0.0270058 | 18/302 |
| GO:0051650 | establishment of vesicle localization | 0.0897019 | 0.0810056 | 0.0285373 | 17/302 |
Genes involved in all enriched terms: ABCA2, ATP2A2, CSNK2A2, DTNBP1, DYNC1H1, EMD, FAM65A, GRIK5, GSK3B, HTT, HUWE1, IPO5, KIF1A, KIF1B, MIEF1, NLGN2, NPEPPS, PRKACA, PRKCG, PTPN14, PTPN23, RAB11B, RAB3A, SEC16A, STX1B, SYN1, TCF7L2, TMED9, TRAPPC1, TRAPPC5, VAMP2
Plots of the results when there are more than 5 terms in common between methods:
plot_results(top_modules_enrichment, top_modules_SFARI, 'GO')
plot_shared_genes(top_modules_enrichment, top_modules_SFARI, 'GO')
compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_SFARI, 'DO')
Enrichment results for cluster 7:
- GSEA has 89 enriched term(s)
- ORA has 645 enriched term(s)
- 1 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| DOID:1826 | epilepsy syndrome | 0.0040891 | 0.0106698 | 0.0040891 | 38/607 |
Genes involved in all enriched terms: ADAM22, CACNB4, CDKL5, CLCN2, CNTNAP2, CRH, DCX, FAM3C, FOXG1, GABBR2, GABRA1, GABRB2, GABRG2, GAD1, GAD2, GJD2, GPHN, GRIA2, GRIA3, GRIK1, GRIN2A, HCN1, HSPBAP1, KCNA1, KCNMA1, KCNQ3, LGI2, MEF2C, PNOC, PVALB, SCN1A, SCN1B, SCN2A, SCN5A, SCN8A, SERPINI1, SLIT2, SYT1
Enrichment results for cluster 22:
- GSEA has 108 enriched term(s)
- ORA has 146 enriched term(s)
- 0 terms are enriched in both methods
Enrichment results for cluster 39:
- GSEA has 10 enriched term(s)
- ORA has 467 enriched term(s)
- 0 terms are enriched in both methods
compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_SFARI, 'DGN')
Enrichment results for cluster 7:
- GSEA has 25 enriched term(s)
- ORA has 2893 enriched term(s)
- 2 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| umls:C0035372 | Rett Syndrome | 0.0560114 | 0.0502623 | 0.0153481 | 26/1234 |
| umls:C0004936 | Mental disorders | 0.0802751 | 0.0422549 | 0.0153481 | 63/1234 |
Genes involved in all enriched terms: ACE, ACTN2, ADCYAP1, ADRBK2, ANKK1, ASMT, ATP6V1B2, CACNG5, CALB1, CARTPT, CCK, CD22, CDKL5, CDR2, CHRM4, CLOCK, CNTNAP2, CORT, CRH, CRHBP, CTXN3, DLX6, EFNA5, ENO2, ERBB4, FAM69A, FGFR2, FOXG1, GABRB2, GABRD, GABRG3, GAD1, GRIA2, GRIN2A, HIST1H4A, HIST1H4B, HIST1H4C, HIST1H4D, HIST1H4E, HIST1H4F, HIST1H4H, HIST1H4I, HIST1H4J, HIST1H4K, HIST1H4L, HIST4H4, HTR2A, HTR5A, HTR7, LMO3, LRRK2, MAGI1, MAP2, MCHR2, NALCN, NOS2, NRG1, NRSN1, NRXN3, NSF, OPRK1, PPP2R2B, PPP3CC, PTPRU, RAPGEF5, RGS4, RNMT, SCN8A, SEMA6A, SGSM3, SLC25A4, SNAP25, ST8SIA2, STMN1, SULT4A1, SYT4, TAC1, TACR1, TSNAX, VIP, WASF1, WDR45, YWHAH
Enrichment results for cluster 22:
- GSEA has 140 enriched term(s)
- ORA has 711 enriched term(s)
- 0 terms are enriched in both methods
Enrichment results for cluster 39:
- GSEA has 24 enriched term(s)
- ORA has 1921 enriched term(s)
- 1 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| umls:C3714756 | Intellectual Disability | 7.93e-05 | 0.0438518 | 7.74e-05 | 29/274 |
Genes involved in all enriched terms: ARID1A, CHAMP1, CIC, CUL4B, DYNC1H1, FASN, FMN2, GATAD2B, GNAS, HCFC1, HERC2, HUWE1, IQSEC2, KIAA1279, KIF1A, KMT2D, MECP2, MED12, PACS1, PCDH19, PRKCG, SHANK2, SMARCA4, SOBP, SPTAN1, SYNGAP1, TCF7L2, TSC1, UBR4
Plots of the results when there are more than 5 terms in common between methods:
plot_results(top_modules_enrichment, top_modules_SFARI, 'DGN')
plot_shared_genes(top_modules_enrichment, top_modules_SFARI, 'DGN')
compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_SFARI, 'KEGG')
Enrichment results for cluster 7:
- GSEA has 48 enriched term(s)
- ORA has 307 enriched term(s)
- 11 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| hsa04020 | Calcium signaling pathway | 0.0000932 | 0.0044042 | 0.0000361 | 43/563 |
| hsa04728 | Dopaminergic synapse | 0.0045852 | 0.0047724 | 0.0007414 | 26/563 |
| hsa04921 | Oxytocin signaling pathway | 0.0057410 | 0.0046608 | 0.0007414 | 29/563 |
| hsa05033 | Nicotine addiction | 0.0056097 | 0.0054811 | 0.0007414 | 12/563 |
| hsa04720 | Long-term potentiation | 0.0252295 | 0.0051540 | 0.0024438 | 16/563 |
| hsa04724 | Glutamatergic synapse | 0.0287679 | 0.0048480 | 0.0024769 | 22/563 |
| hsa04727 | GABAergic synapse | 0.0457932 | 0.0050211 | 0.0029571 | 18/563 |
| hsa04360 | Axon guidance | 0.0021857 | 0.0590901 | 0.0005646 | 34/563 |
| hsa04929 | GnRH secretion | 0.0346940 | 0.0414781 | 0.0026884 | 15/563 |
| hsa04024 | cAMP signaling pathway | 0.0408887 | 0.0359115 | 0.0028804 | 33/563 |
| hsa04713 | Circadian entrainment | 0.0820462 | 0.0049542 | 0.0048906 | 19/563 |
Genes involved in top 5 enriched terms: ADCY10, ADCYAP1, ADRA1D, ADRBK2, ATP2A1, ATP2B1, CACNA2D3, CACNB4, CACNG5, CALM1, CALM2, CALM3, CAMK1G, CAMK4, CAMKK2, CASQ1, CD38, CHRM1, CHRM3, CHRM5, CLOCK, DLGAP1, ERBB4, FGF18, FGF5, FGF8, FGF9, FGFR2, GLS, GNG3, GRIA2, GRIA3, GRIK1, GRIN2A, GRIN3A, GUCY1B3, HTR2A, HTR5A, HTR7, KCNJ3, KCNJ4, KCNJ5, KCNJ6, KCNJ9, KRAS, MAPK10, MAPK8, MAPK9, MCOLN1, MCOLN2, MEF2C, MYLK3, NOS2, P2RX6, PHKG2, PLCB1, PLD2, PPP2CA, PPP2R2B, PPP2R2D, PPP2R5E, PPP3CA, PPP3CB, PPP3CC, PPP3R1, PRKAA2, PRKACB, PRKCB, PTK2B, ROCK2, RPS6KA5, RYR2, SCN1A, SLC17A7, SLC17A8, SLC25A4, SLC38A1, SLC8A1, SLC8A3, SPHK2, TACR1, TRPC1
Genes involved in all enriched terms: ADCY10, ADCYAP1, ADRA1D, ADRBK2, ATP1A1, ATP1B1, ATP2A1, ATP2B1, BRAF, CACNA2D3, CACNB4, CACNG5, CALM1, CALM2, CALM3, CAMK1G, CAMK4, CAMKK2, CASQ1, CD38, CHRM1, CHRM3, CHRM5, CLOCK, CNGB3, CRH, DCC, DLGAP1, EFNA2, EFNA5, EPHA4, EPHA5, EPHB3, ERBB4, FGF18, FGF5, FGF8, FGF9, FGFR2, FZD3, GABARAPL1, GABBR2, GABRA1, GABRA4, GABRB2, GABRD, GABRG2, GABRG3, GAD1, GAD2, GLS, GNG3, GPHN, GRIA2, GRIA3, GRIK1, GRIN2A, GRIN3A, GUCY1B3, HCN1, HTR2A, HTR5A, HTR7, KCNJ3, KCNJ4, KCNJ5, KCNJ6, KCNJ9, KRAS, LRRC4C, MAPK10, MAPK8, MAPK9, MCOLN1, MCOLN2, MEF2C, MYLK3, NGEF, NOS2, NSF, NTN4, P2RX6, PAK1, PAK3, PAK7, PHKG2, PIK3CA, PIK3CB, PIK3CD, PLCB1, PLD2, PLXNB3, PPP2CA, PPP2R2B, PPP2R2D, PPP2R5E, PPP3CA, PPP3CB, PPP3CC, PPP3R1, PRKAA2, PRKACB, PRKCB, PTK2B, RASA1, RND1, ROBO2, ROCK2, RPS6KA5, RPS6KA6, RYR2, SCN1A, SEMA3B, SEMA6A, SLC17A7, SLC17A8, SLC25A4, SLC38A1, SLC8A1, SLC8A3, SLIT2, SPHK2, SST, TACR1, TRPC1, TRPC3, TRPC4, TRPC5, TSHR, UNC5C, VIP
Enrichment results for cluster 22:
- GSEA has 50 enriched term(s)
- ORA has 60 enriched term(s)
- 0 terms are enriched in both methods
Enrichment results for cluster 39:
- GSEA has 49 enriched term(s)
- ORA has 224 enriched term(s)
- 1 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| hsa04962 | Vasopressin-regulated water reabsorption | 0.0535881 | 0.0057079 | 0.0496092 | 6/148 |
Genes involved in all enriched terms: ARHGDIA, DYNC1H1, GNAS, PRKACA, RAB11B, VAMP2
Plots of the results when there are more than 5 terms in common between methods:
plot_results(top_modules_enrichment, top_modules_SFARI, 'KEGG')
plot_shared_genes(top_modules_enrichment, top_modules_SFARI, 'KEGG')
compare_methods(GSEA_enrichment, ORA_enrichment, top_modules_enrichment, top_modules_SFARI, 'Reactome')
Enrichment results for cluster 7:
- GSEA has 70 enriched term(s)
- ORA has 1118 enriched term(s)
- 15 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| R-HSA-112316 | Neuronal System | 0.0000000 | 0.0165790 | 0.0000000 | 90/832 |
| R-HSA-112315 | Transmission across Chemical Synapses | 0.0000011 | 0.0176302 | 0.0000003 | 54/832 |
| R-HSA-112314 | Neurotransmitter receptors and postsynaptic signal transmission | 0.0000009 | 0.0183710 | 0.0000003 | 45/832 |
| R-HSA-1296071 | Potassium Channels | 0.0000092 | 0.0201343 | 0.0000020 | 28/832 |
| R-HSA-442755 | Activation of NMDA receptors and postsynaptic events | 0.0015988 | 0.0203691 | 0.0001539 | 23/832 |
| R-HSA-1296072 | Voltage gated Potassium channels | 0.0000315 | 0.0220304 | 0.0000054 | 17/832 |
| R-HSA-9620244 | Long-term potentiation | 0.0037844 | 0.0230669 | 0.0002899 | 11/832 |
| R-HSA-438064 | Post NMDA receptor activation events | 0.0076408 | 0.0207495 | 0.0003893 | 20/832 |
| R-HSA-5576891 | Cardiac conduction | 0.0095060 | 0.0192402 | 0.0004334 | 29/832 |
| R-HSA-983712 | Ion channel transport | 0.0002076 | 0.0372522 | 0.0000257 | 38/832 |
| R-HSA-397014 | Muscle contraction | 0.0248832 | 0.0183008 | 0.0010264 | 36/832 |
| R-HSA-5576892 | Phase 0 - rapid depolarisation | 0.0389283 | 0.0217603 | 0.0014050 | 14/832 |
| R-HSA-180024 | DARPP-32 events | 0.0457512 | 0.0230007 | 0.0015852 | 10/832 |
| R-HSA-442982 | Ras activation upon Ca2+ influx through NMDA receptor | 0.0585270 | 0.0232663 | 0.0019499 | 9/832 |
| R-HSA-438066 | Unblocking of NMDA receptors, glutamate binding and activation | 0.0909240 | 0.0232296 | 0.0027196 | 9/832 |
Genes involved in top 5 enriched terms: ABCC8, ACTC1, ACTN2, ATP1A1, ATP1B1, ATP2A1, ATP2B1, CACNA2D3, CACNB4, CACNG5, CALM1, CALM2, CALM3, CAMK4, CAMKK2, CASQ1, CHRNA1, CORIN, DLG3, DLGAP1, DYSF, EPB41L3, ERBB4, FGF12, FGF13, FGF14, FLOT1, FLOT2, GABBR2, GABRA1, GABRA4, GABRB2, GABRG2, GABRG3, GAD1, GAD2, GJD2, GLRB, GLS, GNG3, GRIA2, GRIA3, GRIK1, GRIN2A, GRIN3A, GRIP1, GUCY1B3, HCN1, KCNA1, KCNA2, KCNA6, KCNAB1, KCNAB3, KCNB1, KCNB2, KCNC2, KCNE2, KCNF1, KCNH1, KCNH5, KCNH8, KCNIP1, KCNIP2, KCNIP4, KCNJ1, KCNJ3, KCNJ4, KCNJ5, KCNJ6, KCNJ9, KCNMA1, KCNQ3, KCNQ5, KCNS1, KCNS3, KCNV1, KRAS, LIN7A, LRRC7, LRRTM2, LRRTM3, MYL3, NCALD, NLGN4Y, NPTN, NRG1, NRGN, NRXN3, NSF, PAK1, PICK1, PLCB1, PRKAA2, PRKACB, PRKAR2B, PRKCB, PTPRD, RASGRF1, RASGRF2, RPS6KA6, RTN3, RYR2, SCN1A, SCN1B, SCN2A, SCN5A, SCN8A, SLC17A7, SLC38A1, SLC8A1, SLC8A3, SLITRK4, SNAP25, SYT1, SYT10, SYT12, SYT2, TNNT3, TRPC1
Genes involved in all enriched terms: ABCC8, ACTC1, ACTN2, ANO3, ANO7, ANO8, ASIC2, ASIC3, ATP1A1, ATP1B1, ATP2A1, ATP2B1, ATP6V1A, ATP6V1B2, ATP6V1C1, ATP6V1D, ATP6V1E1, ATP6V1G2, ATP6V1H, BEST1, BEST4, C8orf44-SGK3, CACNA2D3, CACNB4, CACNG5, CALM1, CALM2, CALM3, CAMK4, CAMKK2, CASQ1, CHRNA1, CLCN2, CORIN, DLG3, DLGAP1, DYSF, EPB41L3, ERBB4, FGF12, FGF13, FGF14, FLOT1, FLOT2, GABBR2, GABRA1, GABRA4, GABRB2, GABRG2, GABRG3, GAD1, GAD2, GJD2, GLRB, GLS, GNG3, GRIA2, GRIA3, GRIK1, GRIN2A, GRIN3A, GRIP1, GUCY1B3, HCN1, KCNA1, KCNA2, KCNA6, KCNAB1, KCNAB3, KCNB1, KCNB2, KCNC2, KCNE2, KCNF1, KCNH1, KCNH5, KCNH8, KCNIP1, KCNIP2, KCNIP4, KCNJ1, KCNJ3, KCNJ4, KCNJ5, KCNJ6, KCNJ9, KCNMA1, KCNQ3, KCNQ5, KCNS1, KCNS3, KCNV1, KRAS, LIN7A, LRRC7, LRRTM2, LRRTM3, MCOLN1, MCOLN2, MYL3, NALCN, NCALD, NLGN4Y, NPTN, NRG1, NRGN, NRXN3, NSF, PAK1, PICK1, PLCB1, PPP2CA, PPP3CA, PPP3CB, PPP3CC, PPP3R1, PRKAA2, PRKACB, PRKAR2B, PRKCB, PTPRD, RASGRF1, RASGRF2, RPS6KA6, RTN3, RYR2, SCN1A, SCN1B, SCN2A, SCN5A, SCN8A, SCNN1D, SGK2, SGK3, SLC17A7, SLC38A1, SLC8A1, SLC8A3, SLC9B2, SLITRK4, SNAP25, SYT1, SYT10, SYT12, SYT2, TNNT3, TRPC1, TRPC3, TRPC4, TRPC5, TRPV6, UNC80
Enrichment results for cluster 22:
- GSEA has 103 enriched term(s)
- ORA has 312 enriched term(s)
- 0 terms are enriched in both methods
Enrichment results for cluster 39:
- GSEA has 70 enriched term(s)
- ORA has 742 enriched term(s)
- 2 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| R-HSA-9022692 | Regulation of MECP2 expression and activity | 0.0002994 | 0.046890 | 0.0002875 | 8/203 |
| R-HSA-8986944 | Transcriptional Regulation by MECP2 | 0.0476727 | 0.022074 | 0.0228929 | 8/203 |
Genes involved in all enriched terms: AGO1, AGO2, HDAC2, HTT, MECP2, NCOR2, PRKACA, SIN3A
Plots of the results when there are more than 5 terms in common between methods:
plot_results(top_modules_enrichment, top_modules_SFARI, 'Reactome')
plot_shared_genes(top_modules_enrichment, top_modules_SFARI, 'Reactome')
# Get cluster name for clusters wiht numbers 20 and 22
selected_modules = c(genes_info %>% filter(module_number==20) %>% slice_head(1) %>% pull(Module) %>% as.character,
genes_info %>% filter(module_number==22) %>% slice_head(1) %>% pull(Module) %>% as.character)
if(file.exists('./../Data/preprocessedData/top_modules_enrichment_relaxed.RData')){
load('./../Data/preprocessedData/top_modules_enrichment_relaxed.RData')
load('./../Data/preprocessedData/GSEA_results_relaxed.RData')
load('./../Data/preprocessedData/ORA_results_relaxed.RData')
} else{
pvalueCutoff = 0.5
################################################################################################################
# Prepare dataset for Enrichment Analysis
EA_dataset = genes_info %>% dplyr::rename('ensembl_gene_id' = ID) %>% filter(Module!='gray')
# ClusterProfile works with Entrez Gene Ids, o we have to assign one to each gene
getinfo = c('ensembl_gene_id','entrezgene')
mart=useMart(biomart='ENSEMBL_MART_ENSEMBL',dataset='hsapiens_gene_ensembl',host='feb2014.archive.ensembl.org')
biomart_output = getBM(attributes=getinfo, filters=c('ensembl_gene_id'),
values=EA_dataset$ensembl_gene_id, mart=mart)
EA_dataset = biomart_output %>% left_join(EA_dataset, by='ensembl_gene_id') %>%
dplyr::rename('ID'=ensembl_gene_id) %>% distinct(entrezgene, .keep_all = TRUE)
rm(getinfo, mart, biomart_output)
################################################################################################################
# GSEA enrichment
file_name = './../Data/preprocessedData/GSEA_results_relaxed.RData'
if(file.exists(file_name)){
load(file_name)
} else {
cat('\n\nPerforming GSEA\n')
nPerm = 1e5
GSEA_dataset = EA_dataset %>% dplyr::select(ID, entrezgene, contains('MM.'))
GSEA_enrichment = list()
for(module in selected_modules){
cat(paste0('\nModule: ', which(selected_modules == module), '/', length(selected_modules)))
geneList = GSEA_dataset %>% pull(paste0('MM.',substring(module,2)))
names(geneList) = GSEA_dataset %>% pull(entrezgene) %>% as.character
geneList = sort(geneList, decreasing = TRUE)
GSEA_GO = gseGO(geneList, OrgDb = org.Hs.eg.db, pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff,
nPerm = nPerm, verbose = FALSE, seed = TRUE)
GSEA_DO = gseDO(geneList, pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff,
nPerm = nPerm, verbose = FALSE, seed = TRUE)
GSEA_DGN = gseDGN(geneList, pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff,
nPerm = nPerm, verbose = FALSE, seed = TRUE)
GSEA_KEGG = gseKEGG(geneList, organism = 'human', pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff,
nPerm = nPerm, verbose = FALSE, seed = TRUE)
GSEA_Reactome = gsePathway(geneList, organism = 'human', pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff,
nPerm = nPerm, verbose = FALSE, seed = TRUE)
GSEA_enrichment[[module]] = list('GO' = GSEA_GO, 'DO' = GSEA_DO, 'DGN' = GSEA_DGN, 'KEGG' = GSEA_KEGG,
'Reactome' = GSEA_Reactome)
# Save after each iteration (in case it breaks)
save(GSEA_enrichment, file = file_name)
}
rm(GSEA_dataset, nPerm, geneList, GSEA_GO, GSEA_DO, GSEA_DGN, GSEA_KEGG, GSEA_Reactome)
}
################################################################################################################
# ORA enrichment
file_name = './../Data/preprocessedData/ORA_results_relaxed.RData'
if(file.exists(file_name)){
load(file_name)
} else {
cat('\n\nPerforming ORA\n')
# Prepare input
universe = EA_dataset$entrezgene %>% as.character
# Perform Enrichment
ORA_enrichment = list()
for(module in selected_modules){
cat(paste0('\nModule: ', which(selected_modules == module), '/', length(selected_modules)))
genes_in_module = EA_dataset %>% filter(Module == module) %>% pull(entrezgene)
ORA_GO = enrichGO(gene = genes_in_module, universe = universe, OrgDb = org.Hs.eg.db, ont = 'All',
pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff, qvalueCutoff = 1)
ORA_DO = enrichDO(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff)
ORA_DGN = enrichDGN(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff)
ORA_KEGG = enrichKEGG(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff)
ORA_Reactome = enrichPathway(gene = genes_in_module, universe = universe, qvalueCutoff = 1,
pAdjustMethod = 'bonferroni', pvalueCutoff = pvalueCutoff)
ORA_enrichment[[module]] = list('GO' = ORA_GO, 'DO' = ORA_DO, 'DGN' = ORA_DGN, 'KEGG' = ORA_KEGG,
'Reactome' = ORA_Reactome)
# Save after each iteration (in case it breaks)
save(ORA_enrichment, file = file_name)
}
rm(universe, genes_in_module, module, ORA_GO, ORA_DGN, ORA_DO, ORA_KEGG, ORA_Reactome)
}
################################################################################################################
# Get shared enrichment for each module
selected_modules_enrichment = list()
for(module in selected_modules){
module_enrichment = list()
GSEA_enrichment_for_module = GSEA_enrichment[[module]]
ORA_enrichment_for_module = ORA_enrichment[[module]]
for(dataset in c('KEGG', 'Reactome', 'GO', 'DO', 'DGN')){
GSEA_enrichment_dataset = GSEA_enrichment_for_module[[dataset]] %>% data.frame %>%
dplyr::rename('pvalue_GSEA' = pvalue, 'p.adjust_GSEA' = p.adjust, 'qvalues_GSEA' = qvalues)
ORA_enrichment_dataset = ORA_enrichment_for_module[[dataset]] %>% data.frame %>%
dplyr::rename('pvalue_ORA' = pvalue, 'p.adjust_ORA' = p.adjust, 'qvalue_ORA' = qvalue)
# Get shared enrichments (if any)
shared_enrichment_dataset = GSEA_enrichment_dataset %>% inner_join(ORA_enrichment_dataset, by = 'ID')
module_enrichment[[dataset]] = shared_enrichment_dataset
}
selected_modules_enrichment[[module]] = module_enrichment
}
save(selected_modules_enrichment, file = './../Data/preprocessedData/top_modules_enrichment_relaxed.RData')
rm(module, module_enrichment, GSEA_enrichment_for_module, ORA_enrichment_for_module, dataset,
GSEA_enrichment_dataset, ORA_enrichment_dataset, shared_enrichment_dataset)
}
Relaxing the p-value only worked for one of the modules, the other still has zero elements in common between the GSEA and ORA results
compare_methods(GSEA_enrichment, ORA_enrichment, selected_modules_enrichment, selected_modules[2], 'GO')
Enrichment results for cluster 22:
- GSEA has 409 enriched term(s)
- ORA has 10 enriched term(s)
- 1 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| GO:0043409 | negative regulation of MAPK cascade | 0.0598975 | 0.3227047 | 0.0183522 | 6/59 |
Genes involved in all enriched terms: DUSP4, DUSP6, ERRFI1, NCOR1, SPRED2, SPRY2
compare_methods(GSEA_enrichment, ORA_enrichment, selected_modules_enrichment, selected_modules[2], 'DO')
Enrichment results for cluster 22:
- GSEA has 147 enriched term(s)
- ORA has 146 enriched term(s)
- 0 terms are enriched in both methods
compare_methods(GSEA_enrichment, ORA_enrichment, selected_modules_enrichment, selected_modules[2], 'DGN')
Enrichment results for cluster 22:
- GSEA has 219 enriched term(s)
- ORA has 711 enriched term(s)
- 1 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| umls:C0740392 | Infarction, Middle Cerebral Artery | 0.3033946 | 0.3073569 | 0.1428374 | 4/58 |
Genes involved in all enriched terms: ARC, EGR1, EGR2, EGR4
compare_methods(GSEA_enrichment, ORA_enrichment, selected_modules_enrichment, selected_modules[2], 'KEGG')
Enrichment results for cluster 22:
- GSEA has 64 enriched term(s)
- ORA has 60 enriched term(s)
- 1 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| hsa05202 | Transcriptional misregulation in cancer | 0.4564711 | 0.0045094 | 0.4484628 | 4/32 |
Genes involved in all enriched terms: DUSP6, NCOR1, NR4A3, PER2
compare_methods(GSEA_enrichment, ORA_enrichment, selected_modules_enrichment, selected_modules[2], 'Reactome')
Enrichment results for cluster 22:
- GSEA has 137 enriched term(s)
- ORA has 312 enriched term(s)
- 2 terms are enriched in both methods
| ID | Description | p.adjust_ORA | p.adjust_GSEA | qvalue_ORA | GeneRatio |
|---|---|---|---|---|---|
| R-HSA-3247509 | Chromatin modifying enzymes | 0.456008 | 0.0171148 | 0.0946049 | 6/43 |
| R-HSA-4839726 | Chromatin organization | 0.456008 | 0.0171148 | 0.0946049 | 6/43 |
Genes involved in all enriched terms: KDM5B, MTA2, NCOA2, NCOR1, TRRAP, WHSC1L1
Plots of the results when there are more than 5 terms in common between methods:
plot_results(selected_modules_enrichment, selected_modules[2], 'Reactome')
plot_shared_genes(selected_modules_enrichment, selected_modules[2], 'Reactome')
Note: I am using a corrected p-value threhsold of 0.05, since the relaxation was only because we were combining these results with the ones from the ORA
load('./../Data/preprocessedData/GSEA_results.RData')
load('./../Data/preprocessedData/ORA_results.RData')
print_GSEA_top_results = function(module, n){
for(database in c('GO','DO','DGN','KEGG','Reactome')){
res = GSEA_enrichment[[module]][[database]]@result %>% filter(p.adjust<0.05 & NES>0) %>%
dplyr::select(ID, Description, NES, p.adjust, qvalues) %>% arrange(desc(NES)) %>% top_n(n, wt=NES)
cat(paste0('\n',database,':\n'))
if(nrow(res)>0){
print(res %>% kable %>% kable_styling(full_width = F))
#print(xtable(res, display =c('s','s','s','f','e','e')), include.rownames=FALSE) # thesis
} else {
cat('\nNo enriched terms found\n\n\n')
}
}
}
plot_shared_genes_GSEA = function(module, n){
for(database in c('GO','DO','DGN','KEGG','Reactome')){
plot_data = GSEA_enrichment[[module]][[database]]@result %>% filter(p.adjust<0.05 & NES>0) %>%
arrange(desc(NES)) %>% dplyr::select(ID, core_enrichment) %>% slice_head(n=n)
if(nrow(plot_data)>1){
shared_genes = matrix(0, nrow(plot_data), nrow(plot_data))
for(i in 1:(nrow(plot_data)-1)){
for(j in (i+1):nrow(plot_data)){
gene_set_1 = strsplit(plot_data$core_enrichment[i], '/') %>% unlist
gene_set_2 = strsplit(plot_data$core_enrichment[j], '/') %>% unlist
shared_genes[i,j] = sum(gene_set_1 %in% gene_set_2)/length(unique(c(gene_set_1, gene_set_2)))
shared_genes[j,i] = shared_genes[i,j]
}
}
rownames(shared_genes) = plot_data$ID
colnames(shared_genes) = plot_data$ID
corrplot(shared_genes, type = 'lower', method = 'square', diag = FALSE, number.digits = 2, cl.pos = 'n',
tl.pos = 'ld', tl.col = '#666666', order = 'hclust', col.lim = c(0,1), addCoef.col = 'black',
mar = c(0,0,2,0), tl.cex = 0.8, number.cex= 0.8,
title = paste0('Genes in common in the ',database, ' database for cluster ',
genes_info$module_number[genes_info$Module==module][1]))
}
}
}
print_ORA_top_results = function(module, n){
for(database in c('GO','DO','DGN','KEGG','Reactome')){
res = ORA_enrichment[[module]][[database]]@result %>% filter(p.adjust<0.05) %>%
dplyr::select(ID, Description, p.adjust, qvalue, GeneRatio) %>% arrange(p.adjust) %>%
top_n(n, wt=p.adjust)
cat(paste0('\n',database,':\n'))
if(nrow(res)>0){
print(res %>% kable %>% kable_styling(full_width = F))
#print(xtable(res, display =c('s','s','s','e','e','s')), include.rownames=FALSE) # thesis
} else {
cat('\nNo enriched terms found\n\n\n')
}
}
}
plot_shared_genes_ORA = function(module, n){
for(database in c('GO','DO','DGN','KEGG','Reactome')){
plot_data = ORA_enrichment[[module]][[database]]@result %>% filter(p.adjust<0.05) %>%
arrange(desc(p.adjust)) %>% dplyr::select(ID, geneID) %>% slice_head(n=n)
if(nrow(plot_data)>1){
shared_genes = matrix(0, nrow(plot_data), nrow(plot_data))
for(i in 1:(nrow(plot_data)-1)){
for(j in (i+1):nrow(plot_data)){
gene_set_1 = strsplit(plot_data$core_enrichment[i], '/') %>% unlist
gene_set_2 = strsplit(plot_data$core_enrichment[j], '/') %>% unlist
shared_genes[i,j] = sum(gene_set_1 %in% gene_set_2)/length(unique(c(gene_set_1, gene_set_2)))
shared_genes[j,i] = shared_genes[i,j]
}
}
rownames(shared_genes) = plot_data$ID
colnames(shared_genes) = plot_data$ID
corrplot(shared_genes, type = 'lower', method = 'square', diag = FALSE, number.digits = 2, cl.pos = 'n',
tl.pos = 'ld', tl.col = '#666666', order = 'hclust', col.lim = c(0,1), addCoef.col = 'black',
mar = c(0,0,2,0), tl.cex = 0.8, number.cex= 0.8,
title = paste0('Genes in common in the ',database, ' database for cluster ',
genes_info$module_number[genes_info$Module==module][1]))
}
}
}
print_GSEA_top_results(selected_modules[1], 5)
GO:
No enriched terms found
DO:| ID | Description | NES | p.adjust | qvalues | |
|---|---|---|---|---|---|
| DOID:1826 | DOID:1826 | epilepsy syndrome | 2.149075 | 0.0136879 | 0.0002301 |
DGN:
No enriched terms found
KEGG:| ID | Description | NES | p.adjust | qvalues | |
|---|---|---|---|---|---|
| hsa00190 | hsa00190 | Oxidative phosphorylation | 2.857289 | 0.0059279 | 0.0002276 |
| hsa05016 | hsa05016 | Huntington disease | 2.613427 | 0.0055894 | 0.0002276 |
| hsa00020 | hsa00020 | Citrate cycle (TCA cycle) | 2.563582 | 0.0063893 | 0.0002276 |
| hsa05012 | hsa05012 | Parkinson disease | 2.471158 | 0.0056574 | 0.0002276 |
| hsa05415 | hsa05415 | Diabetic cardiomyopathy | 2.431780 | 0.0057627 | 0.0002276 |
| ID | Description | NES | p.adjust | qvalues | |
|---|---|---|---|---|---|
| R-HSA-1428517 | R-HSA-1428517 | The citric acid (TCA) cycle and respiratory electron transport | 2.917908 | 0.0235084 | 0.0003226 |
| R-HSA-611105 | R-HSA-611105 | Respiratory electron transport | 2.876693 | 0.0242372 | 0.0003226 |
| R-HSA-163200 | R-HSA-163200 | Respiratory electron transport, ATP synthesis by chemiosmotic coupling, and heat production by uncoupling proteins. | 2.821164 | 0.0240467 | 0.0003226 |
| R-HSA-5576892 | R-HSA-5576892 | Phase 0 - rapid depolarisation | 2.660878 | 0.0252349 | 0.0003226 |
| R-HSA-888590 | R-HSA-888590 | GABA synthesis, release, reuptake and degradation | 2.627300 | 0.0261688 | 0.0003226 |
Plots of the results when there are more than 2 terms in common between methods:
plot_shared_genes_GSEA(selected_modules[1], 5)
print_ORA_top_results(selected_modules[1], 5)
GO:
| ID | Description | p.adjust | qvalue | GeneRatio | |
|---|---|---|---|---|---|
| GO:0070603 | GO:0070603 | SWI/SNF superfamily-type complex | 0.0472059 | 0.0466179 | 10/499 |
DO:
No enriched terms found
DGN:
No enriched terms found
KEGG:
No enriched terms found
Reactome:
No enriched terms found
sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=en_GB.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=en_GB.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] xtable_1.8-4 kableExtra_1.1.0 knitr_1.32
## [4] doParallel_1.0.15 iterators_1.0.13 foreach_1.5.1
## [7] org.Hs.eg.db_3.8.2 AnnotationDbi_1.46.1 IRanges_2.18.3
## [10] S4Vectors_0.22.1 Biobase_2.44.0 BiocGenerics_0.30.0
## [13] DOSE_3.10.2 ReactomePA_1.28.0 clusterProfiler_3.12.0
## [16] biomaRt_2.40.5 polycor_0.7-10 expss_0.10.7
## [19] WGCNA_1.69 fastcluster_1.2.3 dynamicTreeCut_1.63-1
## [22] ggExtra_0.9 ggpubr_0.2.5 magrittr_2.0.1
## [25] GGally_1.5.0 corrplot_0.90 colorspace_2.0-2
## [28] gridExtra_2.3 viridis_0.6.1 viridisLite_0.4.0
## [31] RColorBrewer_1.1-2 dendextend_1.15.1 plotly_4.9.2
## [34] glue_1.4.2 reshape2_1.4.4 forcats_0.5.0
## [37] stringr_1.4.0 dplyr_1.0.1 purrr_0.3.4
## [40] readr_1.3.1 tidyr_1.1.0 tibble_3.1.2
## [43] ggplot2_3.3.5 tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 tidyselect_1.1.1
## [3] RSQLite_2.2.0 htmlwidgets_1.5.3
## [5] grid_3.6.3 BiocParallel_1.18.1
## [7] munsell_0.5.0 codetools_0.2-16
## [9] preprocessCore_1.46.0 miniUI_0.1.1.1
## [11] withr_2.4.2 GOSemSim_2.10.0
## [13] highr_0.9 rstudioapi_0.13
## [15] ggsignif_0.6.2 labeling_0.4.2
## [17] urltools_1.7.3 GenomeInfoDbData_1.2.1
## [19] polyclip_1.10-0 bit64_4.0.5
## [21] farver_2.1.0 vctrs_0.3.8
## [23] generics_0.1.0 xfun_0.25
## [25] GenomeInfoDb_1.20.0 R6_2.5.1
## [27] graphlayouts_0.7.0 locfit_1.5-9.4
## [29] DelayedArray_0.10.0 bitops_1.0-7
## [31] cachem_1.0.6 reshape_0.8.8
## [33] fgsea_1.10.1 gridGraphics_0.5-1
## [35] assertthat_0.2.1 promises_1.2.0.1
## [37] scales_1.1.1 ggraph_2.0.3
## [39] nnet_7.3-14 enrichplot_1.4.0
## [41] gtable_0.3.0 tidygraph_1.2.0
## [43] rlang_0.4.11 genefilter_1.66.0
## [45] splines_3.6.3 lazyeval_0.2.2
## [47] acepack_1.4.1 impute_1.58.0
## [49] broom_0.7.0 europepmc_0.4
## [51] checkmate_2.0.0 BiocManager_1.30.16
## [53] yaml_2.2.1 modelr_0.1.6
## [55] crosstalk_1.1.1 backports_1.2.1
## [57] httpuv_1.6.1 qvalue_2.16.0
## [59] Hmisc_4.4-0 tools_3.6.3
## [61] ggplotify_0.1.0 ellipsis_0.3.2
## [63] jquerylib_0.1.4 ggridges_0.5.3
## [65] Rcpp_1.0.7 plyr_1.8.6
## [67] zlibbioc_1.30.0 base64enc_0.1-3
## [69] progress_1.2.2 RCurl_1.98-1.4
## [71] prettyunits_1.1.1 rpart_4.1-15
## [73] cowplot_1.1.1 SummarizedExperiment_1.14.1
## [75] haven_2.2.0 ggrepel_0.9.1
## [77] cluster_2.1.0 fs_1.5.0
## [79] data.table_1.14.0 DO.db_2.9
## [81] reactome.db_1.68.0 triebeard_0.3.0
## [83] reprex_0.3.0 matrixStats_0.60.1
## [85] hms_1.1.0 mime_0.11
## [87] evaluate_0.14 XML_3.99-0.3
## [89] jpeg_0.1-9 readxl_1.3.1
## [91] compiler_3.6.3 crayon_1.4.1
## [93] htmltools_0.5.2 later_1.3.0
## [95] Formula_1.2-4 geneplotter_1.62.0
## [97] lubridate_1.7.10 DBI_1.1.1
## [99] tweenr_1.0.2 dbplyr_1.4.2
## [101] rappdirs_0.3.3 MASS_7.3-53
## [103] Matrix_1.2-18 cli_3.0.1
## [105] igraph_1.2.6 GenomicRanges_1.36.1
## [107] pkgconfig_2.0.3 rvcheck_0.1.8
## [109] foreign_0.8-76 xml2_1.3.2
## [111] annotate_1.62.0 bslib_0.3.0
## [113] XVector_0.24.0 webshot_0.5.2
## [115] rvest_0.3.5 yulab.utils_0.0.2
## [117] digest_0.6.27 graph_1.62.0
## [119] rmarkdown_2.7 cellranger_1.1.0
## [121] fastmatch_1.1-3 htmlTable_1.13.3
## [123] curl_4.3.2 shiny_1.6.0
## [125] graphite_1.30.0 lifecycle_1.0.0
## [127] jsonlite_1.7.2 fansi_0.5.0
## [129] pillar_1.6.2 lattice_0.20-41
## [131] fastmap_1.1.0 httr_1.4.2
## [133] survival_3.2-7 GO.db_3.8.2
## [135] UpSetR_1.4.0 png_0.1-7
## [137] bit_4.0.4 ggforce_0.3.1
## [139] stringi_1.7.4 sass_0.4.0
## [141] blob_1.2.2 DESeq2_1.24.0
## [143] latticeExtra_0.6-29 memoise_2.0.0